[ { "loss": 10.586199951171874, "learning_rate": 1.0000000000000001e-07, "epoch": 0.0004022995441946164, "total_flos": 386127310848000, "step": 100 }, { "loss": 10.407359619140625, "learning_rate": 2.0000000000000002e-07, "epoch": 0.0008045990883892328, "total_flos": 770825897533440, "step": 200 }, { "loss": 10.11994140625, "learning_rate": 3.0000000000000004e-07, "epoch": 0.0012068986325838493, "total_flos": 1146022671851520, "step": 300 }, { "loss": 9.836728515625, "learning_rate": 4.0000000000000003e-07, "epoch": 0.0016091981767784656, "total_flos": 1525973007974400, "step": 400 }, { "loss": 9.67935546875, "learning_rate": 5.000000000000001e-07, "epoch": 0.002011497720973082, "total_flos": 1910342297640960, "step": 500 }, { "loss": 9.602734375, "learning_rate": 6.000000000000001e-07, "epoch": 0.0024137972651676986, "total_flos": 2302991813959680, "step": 600 }, { "loss": 9.5771630859375, "learning_rate": 7.000000000000001e-07, "epoch": 0.002816096809362315, "total_flos": 2684599257661440, "step": 700 }, { "loss": 9.5471923828125, "learning_rate": 8.000000000000001e-07, "epoch": 0.0032183963535569312, "total_flos": 3081306563051520, "step": 800 }, { "loss": 9.5011572265625, "learning_rate": 9.000000000000001e-07, "epoch": 0.0036206958977515477, "total_flos": 3464916345077760, "step": 900 }, { "loss": 9.463251953125, "learning_rate": 1.0000000000000002e-06, "epoch": 0.004022995441946164, "total_flos": 3820976713605120, "step": 1000 }, { "loss": 9.3936328125, "learning_rate": 1.1e-06, "epoch": 0.004425294986140781, "total_flos": 4177578828840960, "step": 1100 }, { "loss": 9.408447265625, "learning_rate": 1.2000000000000002e-06, "epoch": 0.004827594530335397, "total_flos": 4562851029688320, "step": 1200 }, { "loss": 9.4076171875, "learning_rate": 1.3e-06, "epoch": 0.005229894074530014, "total_flos": 4960615272284160, "step": 1300 }, { "loss": 9.366455078125, "learning_rate": 1.4000000000000001e-06, "epoch": 0.00563219361872463, "total_flos": 5362835647119360, "step": 1400 }, { "loss": 9.307685546875, "learning_rate": 1.5e-06, "epoch": 0.006034493162919247, "total_flos": 5749318811197440, "step": 1500 }, { "loss": 9.2849609375, "learning_rate": 1.6000000000000001e-06, "epoch": 0.0064367927071138625, "total_flos": 6150668142305280, "step": 1600 }, { "loss": 9.23236328125, "learning_rate": 1.7000000000000002e-06, "epoch": 0.006839092251308479, "total_flos": 6520288112271360, "step": 1700 }, { "loss": 9.2218359375, "learning_rate": 1.8000000000000001e-06, "epoch": 0.0072413917955030955, "total_flos": 6900105667338240, "step": 1800 }, { "loss": 9.15521484375, "learning_rate": 1.9000000000000002e-06, "epoch": 0.007643691339697712, "total_flos": 7272227232399360, "step": 1900 }, { "loss": 9.12775390625, "learning_rate": 2.0000000000000003e-06, "epoch": 0.008045990883892329, "total_flos": 7660521530081280, "step": 2000 }, { "loss": 9.10115234375, "learning_rate": 2.1000000000000002e-06, "epoch": 0.008448290428086945, "total_flos": 8051205886771200, "step": 2100 }, { "loss": 9.0774609375, "learning_rate": 2.2e-06, "epoch": 0.008850589972281562, "total_flos": 8441274139361280, "step": 2200 }, { "loss": 9.02068359375, "learning_rate": 2.3000000000000004e-06, "epoch": 0.009252889516476178, "total_flos": 8837179447173120, "step": 2300 }, { "loss": 8.96419921875, "learning_rate": 2.4000000000000003e-06, "epoch": 0.009655189060670795, "total_flos": 9220624580689920, "step": 2400 }, { "loss": 8.93837890625, "learning_rate": 2.5e-06, "epoch": 0.010057488604865411, "total_flos": 9606024251351040, "step": 2500 }, { "loss": 8.93515625, "learning_rate": 2.6e-06, "epoch": 0.010459788149060028, "total_flos": 9975607042621440, "step": 2600 }, { "loss": 8.854921875, "learning_rate": 2.7000000000000004e-06, "epoch": 0.010862087693254644, "total_flos": 10357543783342080, "step": 2700 }, { "loss": 8.85291015625, "learning_rate": 2.8000000000000003e-06, "epoch": 0.01126438723744926, "total_flos": 10759126809108480, "step": 2800 }, { "loss": 8.84685546875, "learning_rate": 2.9e-06, "epoch": 0.011666686781643877, "total_flos": 11143910375669760, "step": 2900 }, { "loss": 8.80154296875, "learning_rate": 3e-06, "epoch": 0.012068986325838494, "total_flos": 11528481492541440, "step": 3000 }, { "loss": 8.77578125, "learning_rate": 3.1000000000000004e-06, "epoch": 0.012471285870033108, "total_flos": 11920913247928320, "step": 3100 }, { "loss": 8.73296875, "learning_rate": 3.2000000000000003e-06, "epoch": 0.012873585414227725, "total_flos": 12289231963545600, "step": 3200 }, { "loss": 8.7233984375, "learning_rate": 3.3000000000000006e-06, "epoch": 0.013275884958422341, "total_flos": 12677494393774080, "step": 3300 }, { "loss": 8.6807421875, "learning_rate": 3.4000000000000005e-06, "epoch": 0.013678184502616958, "total_flos": 13084771071221760, "step": 3400 }, { "loss": 8.63228515625, "learning_rate": 3.5e-06, "epoch": 0.014080484046811574, "total_flos": 13476108710707200, "step": 3500 }, { "loss": 8.59236328125, "learning_rate": 3.6000000000000003e-06, "epoch": 0.014482783591006191, "total_flos": 13857344367452160, "step": 3600 }, { "loss": 8.5948046875, "learning_rate": 3.7e-06, "epoch": 0.014885083135200807, "total_flos": 14246679668613120, "step": 3700 }, { "loss": 8.587265625, "learning_rate": 3.8000000000000005e-06, "epoch": 0.015287382679395424, "total_flos": 14643190458040320, "step": 3800 }, { "loss": 8.5428125, "learning_rate": 3.900000000000001e-06, "epoch": 0.015689682223590042, "total_flos": 15023629428449280, "step": 3900 }, { "loss": 8.5461328125, "learning_rate": 4.000000000000001e-06, "epoch": 0.016091981767784657, "total_flos": 15404817284014080, "step": 4000 }, { "loss": 8.5058203125, "learning_rate": 4.1e-06, "epoch": 0.016494281311979272, "total_flos": 15776694531932160, "step": 4100 }, { "loss": 8.421875, "learning_rate": 4.2000000000000004e-06, "epoch": 0.01689658085617389, "total_flos": 16166810585702400, "step": 4200 }, { "loss": 8.4818359375, "learning_rate": 4.3e-06, "epoch": 0.017298880400368505, "total_flos": 16563358553825280, "step": 4300 }, { "loss": 8.431875, "learning_rate": 4.4e-06, "epoch": 0.017701179944563123, "total_flos": 16948646688399360, "step": 4400 }, { "loss": 8.3889453125, "learning_rate": 4.5e-06, "epoch": 0.018103479488757738, "total_flos": 17323131756257280, "step": 4500 }, { "loss": 8.3789453125, "learning_rate": 4.600000000000001e-06, "epoch": 0.018505779032952356, "total_flos": 17697250348400640, "step": 4600 }, { "loss": 8.3865625, "learning_rate": 4.7e-06, "epoch": 0.01890807857714697, "total_flos": 18067422687559680, "step": 4700 }, { "loss": 8.3894140625, "learning_rate": 4.800000000000001e-06, "epoch": 0.01931037812134159, "total_flos": 18453305681264640, "step": 4800 }, { "loss": 8.3816015625, "learning_rate": 4.9000000000000005e-06, "epoch": 0.019712677665536204, "total_flos": 18843400490065920, "step": 4900 }, { "loss": 8.33140625, "learning_rate": 5e-06, "epoch": 0.020114977209730822, "total_flos": 19232587076444160, "step": 5000 }, { "loss": 8.3114453125, "learning_rate": 5.1e-06, "epoch": 0.020517276753925437, "total_flos": 19619149909155840, "step": 5100 }, { "loss": 8.309765625, "learning_rate": 5.2e-06, "epoch": 0.020919576298120055, "total_flos": 20006886526402560, "step": 5200 }, { "loss": 8.27421875, "learning_rate": 5.300000000000001e-06, "epoch": 0.02132187584231467, "total_flos": 20409090967511040, "step": 5300 }, { "loss": 8.2626953125, "learning_rate": 5.400000000000001e-06, "epoch": 0.021724175386509288, "total_flos": 20797831409541120, "step": 5400 }, { "loss": 8.2404296875, "learning_rate": 5.500000000000001e-06, "epoch": 0.022126474930703903, "total_flos": 21173808936468480, "step": 5500 }, { "loss": 8.2509375, "learning_rate": 5.600000000000001e-06, "epoch": 0.02252877447489852, "total_flos": 21566952398315520, "step": 5600 }, { "loss": 8.241171875, "learning_rate": 5.7e-06, "epoch": 0.022931074019093136, "total_flos": 21939881272197120, "step": 5700 }, { "loss": 8.2608984375, "learning_rate": 5.8e-06, "epoch": 0.023333373563287754, "total_flos": 22334511881871360, "step": 5800 }, { "loss": 8.2644921875, "learning_rate": 5.9e-06, "epoch": 0.02373567310748237, "total_flos": 22722391902658560, "step": 5900 }, { "loss": 8.24015625, "learning_rate": 6e-06, "epoch": 0.024137972651676987, "total_flos": 23106277869281280, "step": 6000 }, { "loss": 8.206328125, "learning_rate": 6.1e-06, "epoch": 0.024540272195871602, "total_flos": 23485075665838080, "step": 6100 }, { "loss": 8.1804296875, "learning_rate": 6.200000000000001e-06, "epoch": 0.024942571740066217, "total_flos": 23858482551521280, "step": 6200 }, { "loss": 8.1660546875, "learning_rate": 6.300000000000001e-06, "epoch": 0.025344871284260835, "total_flos": 24253198141071360, "step": 6300 }, { "loss": 8.1920703125, "learning_rate": 6.4000000000000006e-06, "epoch": 0.02574717082845545, "total_flos": 24651323548139520, "step": 6400 }, { "loss": 8.1900390625, "learning_rate": 6.5000000000000004e-06, "epoch": 0.026149470372650068, "total_flos": 25034471252090880, "step": 6500 }, { "loss": 8.1867578125, "learning_rate": 6.600000000000001e-06, "epoch": 0.026551769916844683, "total_flos": 25420348934553600, "step": 6600 }, { "loss": 8.175390625, "learning_rate": 6.700000000000001e-06, "epoch": 0.0269540694610393, "total_flos": 25811097026150400, "step": 6700 }, { "loss": 8.129140625, "learning_rate": 6.800000000000001e-06, "epoch": 0.027356369005233916, "total_flos": 26199901203087360, "step": 6800 }, { "loss": 8.0966015625, "learning_rate": 6.9e-06, "epoch": 0.027758668549428534, "total_flos": 26583585342504960, "step": 6900 }, { "loss": 8.1080859375, "learning_rate": 7e-06, "epoch": 0.02816096809362315, "total_flos": 26977020922675200, "step": 7000 }, { "loss": 8.133203125, "learning_rate": 7.100000000000001e-06, "epoch": 0.028563267637817767, "total_flos": 27377706348503040, "step": 7100 }, { "loss": 8.08125, "learning_rate": 7.2000000000000005e-06, "epoch": 0.028965567182012382, "total_flos": 27742880808714240, "step": 7200 }, { "loss": 8.0592578125, "learning_rate": 7.3e-06, "epoch": 0.029367866726207, "total_flos": 28132927816335360, "step": 7300 }, { "loss": 8.0902734375, "learning_rate": 7.4e-06, "epoch": 0.029770166270401615, "total_flos": 28530766416322560, "step": 7400 }, { "loss": 8.0482421875, "learning_rate": 7.500000000000001e-06, "epoch": 0.030172465814596233, "total_flos": 28916861859717120, "step": 7500 }, { "loss": 8.052578125, "learning_rate": 7.600000000000001e-06, "epoch": 0.030574765358790848, "total_flos": 29297736351989760, "step": 7600 }, { "loss": 8.027265625, "learning_rate": 7.7e-06, "epoch": 0.030977064902985466, "total_flos": 29680698162462720, "step": 7700 }, { "loss": 8.041328125, "learning_rate": 7.800000000000002e-06, "epoch": 0.031379364447180084, "total_flos": 30064913426104320, "step": 7800 }, { "loss": 8.0309375, "learning_rate": 7.9e-06, "epoch": 0.0317816639913747, "total_flos": 30446223440240640, "step": 7900 }, { "loss": 8.05484375, "learning_rate": 8.000000000000001e-06, "epoch": 0.032183963535569314, "total_flos": 30842330575257600, "step": 8000 }, { "loss": 8.0678125, "learning_rate": 8.1e-06, "epoch": 0.03258626307976393, "total_flos": 31218908272558080, "step": 8100 }, { "loss": 8.036640625, "learning_rate": 8.2e-06, "epoch": 0.032988562623958544, "total_flos": 31600462603837440, "step": 8200 }, { "loss": 8.011640625, "learning_rate": 8.3e-06, "epoch": 0.033390862168153165, "total_flos": 31979148864307200, "step": 8300 }, { "loss": 8.007890625, "learning_rate": 8.400000000000001e-06, "epoch": 0.03379316171234778, "total_flos": 32356284242042880, "step": 8400 }, { "loss": 8.015546875, "learning_rate": 8.5e-06, "epoch": 0.034195461256542395, "total_flos": 32742714293698560, "step": 8500 }, { "loss": 8.02796875, "learning_rate": 8.6e-06, "epoch": 0.03459776080073701, "total_flos": 33126255029575680, "step": 8600 }, { "loss": 7.957734375, "learning_rate": 8.700000000000001e-06, "epoch": 0.03500006034493163, "total_flos": 33495535080038400, "step": 8700 }, { "loss": 7.989921875, "learning_rate": 8.8e-06, "epoch": 0.035402359889126246, "total_flos": 33882984890204160, "step": 8800 }, { "loss": 7.992421875, "learning_rate": 8.900000000000001e-06, "epoch": 0.03580465943332086, "total_flos": 34275421956833280, "step": 8900 }, { "loss": 7.960234375, "learning_rate": 9e-06, "epoch": 0.036206958977515476, "total_flos": 34660736647618560, "step": 9000 }, { "loss": 7.9653125, "learning_rate": 9.100000000000001e-06, "epoch": 0.0366092585217101, "total_flos": 35041526160015360, "step": 9100 }, { "loss": 7.97453125, "learning_rate": 9.200000000000002e-06, "epoch": 0.03701155806590471, "total_flos": 35423919667568640, "step": 9200 }, { "loss": 7.991484375, "learning_rate": 9.3e-06, "epoch": 0.03741385761009933, "total_flos": 35798128550830080, "step": 9300 }, { "loss": 7.961015625, "learning_rate": 9.4e-06, "epoch": 0.03781615715429394, "total_flos": 36197178114048000, "step": 9400 }, { "loss": 7.918359375, "learning_rate": 9.5e-06, "epoch": 0.038218456698488563, "total_flos": 36585398054338560, "step": 9500 }, { "loss": 7.9628125, "learning_rate": 9.600000000000001e-06, "epoch": 0.03862075624268318, "total_flos": 36988096440975360, "step": 9600 }, { "loss": 7.9021875, "learning_rate": 9.7e-06, "epoch": 0.03902305578687779, "total_flos": 37382918255370240, "step": 9700 }, { "loss": 7.886796875, "learning_rate": 9.800000000000001e-06, "epoch": 0.03942535533107241, "total_flos": 37789531027537920, "step": 9800 }, { "loss": 7.9453125, "learning_rate": 9.9e-06, "epoch": 0.03982765487526703, "total_flos": 38177437604536320, "step": 9900 }, { "loss": 7.926328125, "learning_rate": 1e-05, "epoch": 0.040229954419461644, "total_flos": 38570612933836800, "step": 10000 }, { "loss": 7.89109375, "learning_rate": 9.99918887460407e-06, "epoch": 0.04063225396365626, "total_flos": 38940015142871040, "step": 10100 }, { "loss": 7.86578125, "learning_rate": 9.998377749208141e-06, "epoch": 0.041034553507850874, "total_flos": 39318042809303040, "step": 10200 }, { "loss": 7.84171875, "learning_rate": 9.99756662381221e-06, "epoch": 0.04143685305204549, "total_flos": 39700898394931200, "step": 10300 }, { "loss": 7.8109375, "learning_rate": 9.99675549841628e-06, "epoch": 0.04183915259624011, "total_flos": 40076631604715520, "step": 10400 }, { "loss": 7.873125, "learning_rate": 9.995944373020348e-06, "epoch": 0.042241452140434725, "total_flos": 40478591728680960, "step": 10500 }, { "loss": 7.8625, "learning_rate": 9.995133247624418e-06, "epoch": 0.04264375168462934, "total_flos": 40860241662320640, "step": 10600 }, { "loss": 7.83359375, "learning_rate": 9.994322122228488e-06, "epoch": 0.043046051228823955, "total_flos": 41234455856824320, "step": 10700 }, { "loss": 7.85734375, "learning_rate": 9.993510996832556e-06, "epoch": 0.043448350773018576, "total_flos": 41620928398417920, "step": 10800 }, { "loss": 7.87375, "learning_rate": 9.992699871436624e-06, "epoch": 0.04385065031721319, "total_flos": 41991748709130240, "step": 10900 }, { "loss": 7.803828125, "learning_rate": 9.991888746040696e-06, "epoch": 0.044252949861407806, "total_flos": 42377509544263680, "step": 11000 }, { "loss": 7.798984375, "learning_rate": 9.991077620644765e-06, "epoch": 0.04465524940560242, "total_flos": 42755845262745600, "step": 11100 }, { "loss": 7.835859375, "learning_rate": 9.990266495248835e-06, "epoch": 0.04505754894979704, "total_flos": 43127026737930240, "step": 11200 }, { "loss": 7.791640625, "learning_rate": 9.989455369852903e-06, "epoch": 0.04545984849399166, "total_flos": 43488164654039040, "step": 11300 }, { "loss": 7.820703125, "learning_rate": 9.988644244456973e-06, "epoch": 0.04586214803818627, "total_flos": 43868640803143680, "step": 11400 }, { "loss": 7.811796875, "learning_rate": 9.987833119061043e-06, "epoch": 0.04626444758238089, "total_flos": 44249849903677440, "step": 11500 }, { "loss": 7.865625, "learning_rate": 9.987021993665111e-06, "epoch": 0.04666674712657551, "total_flos": 44647162690682880, "step": 11600 }, { "loss": 7.821171875, "learning_rate": 9.98621086826918e-06, "epoch": 0.04706904667077012, "total_flos": 45020373060403200, "step": 11700 }, { "loss": 7.824921875, "learning_rate": 9.985399742873251e-06, "epoch": 0.04747134621496474, "total_flos": 45424935693066240, "step": 11800 }, { "loss": 7.783671875, "learning_rate": 9.98458861747732e-06, "epoch": 0.04787364575915935, "total_flos": 45810664660746240, "step": 11900 }, { "loss": 7.768203125, "learning_rate": 9.98377749208139e-06, "epoch": 0.048275945303353975, "total_flos": 46189717396930560, "step": 12000 }, { "loss": 7.80640625, "learning_rate": 9.982966366685458e-06, "epoch": 0.04867824484754859, "total_flos": 46577698331320320, "step": 12100 }, { "loss": 7.734375, "learning_rate": 9.982155241289528e-06, "epoch": 0.049080544391743204, "total_flos": 46956522684088320, "step": 12200 }, { "loss": 7.780625, "learning_rate": 9.981344115893598e-06, "epoch": 0.04948284393593782, "total_flos": 47332903865425920, "step": 12300 }, { "loss": 7.756953125, "learning_rate": 9.980532990497667e-06, "epoch": 0.049885143480132434, "total_flos": 47733185636843520, "step": 12400 }, { "loss": 7.7184375, "learning_rate": 9.979721865101735e-06, "epoch": 0.050287443024327055, "total_flos": 48110119187374080, "step": 12500 }, { "loss": 7.70953125, "learning_rate": 9.978910739705807e-06, "epoch": 0.05068974256852167, "total_flos": 48498530332385280, "step": 12600 }, { "loss": 7.74328125, "learning_rate": 9.978099614309875e-06, "epoch": 0.051092042112716285, "total_flos": 48884556729630720, "step": 12700 }, { "loss": 7.710859375, "learning_rate": 9.977288488913945e-06, "epoch": 0.0514943416569109, "total_flos": 49260061555998720, "step": 12800 }, { "loss": 7.768125, "learning_rate": 9.976477363518013e-06, "epoch": 0.05189664120110552, "total_flos": 49638206069760000, "step": 12900 }, { "loss": 7.711640625, "learning_rate": 9.975666238122083e-06, "epoch": 0.052298940745300136, "total_flos": 50016839217807360, "step": 13000 }, { "loss": 7.704375, "learning_rate": 9.974855112726154e-06, "epoch": 0.05270124028949475, "total_flos": 50401192573747200, "step": 13100 }, { "loss": 7.684296875, "learning_rate": 9.974043987330222e-06, "epoch": 0.053103539833689366, "total_flos": 50781169466081280, "step": 13200 }, { "loss": 7.73796875, "learning_rate": 9.973232861934292e-06, "epoch": 0.05350583937788399, "total_flos": 51182306347499520, "step": 13300 }, { "loss": 7.68453125, "learning_rate": 9.972421736538362e-06, "epoch": 0.0539081389220786, "total_flos": 51557625280389120, "step": 13400 }, { "loss": 7.7153125, "learning_rate": 9.97161061114243e-06, "epoch": 0.05431043846627322, "total_flos": 51947698844221440, "step": 13500 }, { "loss": 7.743125, "learning_rate": 9.9707994857465e-06, "epoch": 0.05471273801046783, "total_flos": 52344241501102080, "step": 13600 }, { "loss": 7.70734375, "learning_rate": 9.969988360350569e-06, "epoch": 0.055115037554662454, "total_flos": 52721424680017920, "step": 13700 }, { "loss": 7.664140625, "learning_rate": 9.969177234954639e-06, "epoch": 0.05551733709885707, "total_flos": 53109798646333440, "step": 13800 }, { "loss": 7.695234375, "learning_rate": 9.968366109558709e-06, "epoch": 0.05591963664305168, "total_flos": 53494640636559360, "step": 13900 }, { "loss": 7.64703125, "learning_rate": 9.967554984162777e-06, "epoch": 0.0563219361872463, "total_flos": 53881654924861440, "step": 14000 }, { "loss": 7.7009375, "learning_rate": 9.966743858766847e-06, "epoch": 0.05672423573144092, "total_flos": 54260096868188160, "step": 14100 }, { "loss": 7.68828125, "learning_rate": 9.965932733370917e-06, "epoch": 0.057126535275635534, "total_flos": 54652629537177600, "step": 14200 }, { "loss": 7.6703125, "learning_rate": 9.965121607974986e-06, "epoch": 0.05752883481983015, "total_flos": 55020332148695040, "step": 14300 }, { "loss": 7.672109375, "learning_rate": 9.964310482579056e-06, "epoch": 0.057931134364024764, "total_flos": 55410719075819520, "step": 14400 }, { "loss": 7.664140625, "learning_rate": 9.963499357183124e-06, "epoch": 0.05833343390821938, "total_flos": 55795253013995520, "step": 14500 }, { "loss": 7.67359375, "learning_rate": 9.962688231787194e-06, "epoch": 0.058735733452414, "total_flos": 56174709404590080, "step": 14600 }, { "loss": 7.667421875, "learning_rate": 9.961877106391264e-06, "epoch": 0.059138032996608615, "total_flos": 56569525907742720, "step": 14700 }, { "loss": 7.642734375, "learning_rate": 9.961065980995332e-06, "epoch": 0.05954033254080323, "total_flos": 56948212168212480, "step": 14800 }, { "loss": 7.62921875, "learning_rate": 9.960254855599402e-06, "epoch": 0.059942632084997845, "total_flos": 57333999559557120, "step": 14900 }, { "loss": 7.612578125, "learning_rate": 9.959443730203472e-06, "epoch": 0.060344931629192466, "total_flos": 57725820522086400, "step": 15000 }, { "loss": 7.622265625, "learning_rate": 9.95863260480754e-06, "epoch": 0.06074723117338708, "total_flos": 58122315377786880, "step": 15100 }, { "loss": 7.61484375, "learning_rate": 9.957821479411611e-06, "epoch": 0.061149530717581696, "total_flos": 58512038399631360, "step": 15200 }, { "loss": 7.625234375, "learning_rate": 9.957010354015681e-06, "epoch": 0.06155183026177631, "total_flos": 58901511793090560, "step": 15300 }, { "loss": 7.6009375, "learning_rate": 9.95619922861975e-06, "epoch": 0.06195412980597093, "total_flos": 59302032570408960, "step": 15400 }, { "loss": 7.6278125, "learning_rate": 9.95538810322382e-06, "epoch": 0.06235642935016555, "total_flos": 59676129917583360, "step": 15500 }, { "loss": 7.620390625, "learning_rate": 9.954576977827888e-06, "epoch": 0.06275872889436017, "total_flos": 60051448850472960, "step": 15600 }, { "loss": 7.58875, "learning_rate": 9.953765852431958e-06, "epoch": 0.06316102843855478, "total_flos": 60439387294924800, "step": 15700 }, { "loss": 7.602109375, "learning_rate": 9.952954727036028e-06, "epoch": 0.0635633279827494, "total_flos": 60803903161098240, "step": 15800 }, { "loss": 7.59734375, "learning_rate": 9.952143601640096e-06, "epoch": 0.06396562752694401, "total_flos": 61193960791203840, "step": 15900 }, { "loss": 7.581875, "learning_rate": 9.951332476244166e-06, "epoch": 0.06436792707113863, "total_flos": 61577671486832640, "step": 16000 }, { "loss": 7.6121875, "learning_rate": 9.950521350848236e-06, "epoch": 0.06477022661533324, "total_flos": 61978144462970880, "step": 16100 }, { "loss": 7.54375, "learning_rate": 9.949710225452305e-06, "epoch": 0.06517252615952786, "total_flos": 62373401799229440, "step": 16200 }, { "loss": 7.54296875, "learning_rate": 9.948899100056375e-06, "epoch": 0.06557482570372247, "total_flos": 62751572869201920, "step": 16300 }, { "loss": 7.57140625, "learning_rate": 9.948087974660443e-06, "epoch": 0.06597712524791709, "total_flos": 63128070897868800, "step": 16400 }, { "loss": 7.56234375, "learning_rate": 9.947276849264513e-06, "epoch": 0.06637942479211172, "total_flos": 63526164437483520, "step": 16500 }, { "loss": 7.60703125, "learning_rate": 9.946465723868583e-06, "epoch": 0.06678172433630633, "total_flos": 63908127734415360, "step": 16600 }, { "loss": 7.551875, "learning_rate": 9.945654598472651e-06, "epoch": 0.06718402388050095, "total_flos": 64274704362577920, "step": 16700 }, { "loss": 7.5334375, "learning_rate": 9.944843473076721e-06, "epoch": 0.06758632342469556, "total_flos": 64652971034910720, "step": 16800 }, { "loss": 7.53953125, "learning_rate": 9.944032347680791e-06, "epoch": 0.06798862296889017, "total_flos": 65037722734018560, "step": 16900 }, { "loss": 7.4859375, "learning_rate": 9.94322122228486e-06, "epoch": 0.06839092251308479, "total_flos": 65414666907033600, "step": 17000 }, { "loss": 7.57390625, "learning_rate": 9.94241009688893e-06, "epoch": 0.0687932220572794, "total_flos": 65804177479188480, "step": 17100 }, { "loss": 7.52796875, "learning_rate": 9.941598971492998e-06, "epoch": 0.06919552160147402, "total_flos": 66188174981898240, "step": 17200 }, { "loss": 7.563125, "learning_rate": 9.940787846097068e-06, "epoch": 0.06959782114566865, "total_flos": 66582141686292480, "step": 17300 }, { "loss": 7.5603125, "learning_rate": 9.939976720701138e-06, "epoch": 0.07000012068986326, "total_flos": 66955405168435200, "step": 17400 }, { "loss": 7.51359375, "learning_rate": 9.939165595305207e-06, "epoch": 0.07040242023405788, "total_flos": 67332912333127680, "step": 17500 }, { "loss": 7.52109375, "learning_rate": 9.938354469909277e-06, "epoch": 0.07080471977825249, "total_flos": 67707721386762240, "step": 17600 }, { "loss": 7.44921875, "learning_rate": 9.937543344513347e-06, "epoch": 0.07120701932244711, "total_flos": 68081138894929920, "step": 17700 }, { "loss": 7.4775, "learning_rate": 9.936732219117415e-06, "epoch": 0.07160931886664172, "total_flos": 68459777354219520, "step": 17800 }, { "loss": 7.51078125, "learning_rate": 9.935921093721485e-06, "epoch": 0.07201161841083634, "total_flos": 68832860254126080, "step": 17900 }, { "loss": 7.47203125, "learning_rate": 9.935109968325553e-06, "epoch": 0.07241391795503095, "total_flos": 69221690987274240, "step": 18000 }, { "loss": 7.543125, "learning_rate": 9.934298842929623e-06, "epoch": 0.07281621749922558, "total_flos": 69600775590912000, "step": 18100 }, { "loss": 7.52859375, "learning_rate": 9.933487717533694e-06, "epoch": 0.0732185170434202, "total_flos": 69985426376417280, "step": 18200 }, { "loss": 7.551875, "learning_rate": 9.932676592137762e-06, "epoch": 0.07362081658761481, "total_flos": 70352093295697920, "step": 18300 }, { "loss": 7.50328125, "learning_rate": 9.931865466741832e-06, "epoch": 0.07402311613180942, "total_flos": 70734311532257280, "step": 18400 }, { "loss": 7.455625, "learning_rate": 9.931054341345902e-06, "epoch": 0.07442541567600404, "total_flos": 71120391041925120, "step": 18500 }, { "loss": 7.51875, "learning_rate": 9.93024321594997e-06, "epoch": 0.07482771522019865, "total_flos": 71509651985694720, "step": 18600 }, { "loss": 7.51359375, "learning_rate": 9.92943209055404e-06, "epoch": 0.07523001476439327, "total_flos": 71894754226790400, "step": 18700 }, { "loss": 7.46921875, "learning_rate": 9.928620965158109e-06, "epoch": 0.07563231430858788, "total_flos": 72278024089313280, "step": 18800 }, { "loss": 7.44078125, "learning_rate": 9.927809839762179e-06, "epoch": 0.0760346138527825, "total_flos": 72668798737121280, "step": 18900 }, { "loss": 7.47796875, "learning_rate": 9.926998714366249e-06, "epoch": 0.07643691339697713, "total_flos": 73043692770631680, "step": 19000 }, { "loss": 7.48078125, "learning_rate": 9.926187588970317e-06, "epoch": 0.07683921294117174, "total_flos": 73429182732410880, "step": 19100 }, { "loss": 7.4975, "learning_rate": 9.925376463574387e-06, "epoch": 0.07724151248536636, "total_flos": 73817487652577280, "step": 19200 }, { "loss": 7.4734375, "learning_rate": 9.924565338178457e-06, "epoch": 0.07764381202956097, "total_flos": 74207008847216640, "step": 19300 }, { "loss": 7.420625, "learning_rate": 9.923754212782526e-06, "epoch": 0.07804611157375559, "total_flos": 74577611396997120, "step": 19400 }, { "loss": 7.40171875, "learning_rate": 9.922943087386596e-06, "epoch": 0.0784484111179502, "total_flos": 74958565557903360, "step": 19500 }, { "loss": 7.4609375, "learning_rate": 9.922131961990666e-06, "epoch": 0.07885071066214482, "total_flos": 75342509948190720, "step": 19600 }, { "loss": 7.42234375, "learning_rate": 9.921320836594734e-06, "epoch": 0.07925301020633943, "total_flos": 75722008828723200, "step": 19700 }, { "loss": 7.46140625, "learning_rate": 9.920509711198804e-06, "epoch": 0.07965530975053406, "total_flos": 76088107445084160, "step": 19800 }, { "loss": 7.455625, "learning_rate": 9.919698585802872e-06, "epoch": 0.08005760929472867, "total_flos": 76479779692830720, "step": 19900 }, { "loss": 7.45453125, "learning_rate": 9.918887460406942e-06, "epoch": 0.08045990883892329, "total_flos": 76853765503918080, "step": 20000 }, { "loss": 7.4003125, "learning_rate": 9.918076335011012e-06, "epoch": 0.0808622083831179, "total_flos": 77219853497794560, "step": 20100 }, { "loss": 7.44890625, "learning_rate": 9.91726520961508e-06, "epoch": 0.08126450792731252, "total_flos": 77600913883545600, "step": 20200 }, { "loss": 7.43046875, "learning_rate": 9.916454084219151e-06, "epoch": 0.08166680747150713, "total_flos": 77988586765885440, "step": 20300 }, { "loss": 7.44390625, "learning_rate": 9.915642958823221e-06, "epoch": 0.08206910701570175, "total_flos": 78385124111523840, "step": 20400 }, { "loss": 7.3775, "learning_rate": 9.91483183342729e-06, "epoch": 0.08247140655989636, "total_flos": 78774916179517440, "step": 20500 }, { "loss": 7.42515625, "learning_rate": 9.91402070803136e-06, "epoch": 0.08287370610409098, "total_flos": 79147489200168960, "step": 20600 }, { "loss": 7.454375, "learning_rate": 9.913209582635428e-06, "epoch": 0.0832760056482856, "total_flos": 79546023572889600, "step": 20700 }, { "loss": 7.4153125, "learning_rate": 9.912398457239498e-06, "epoch": 0.08367830519248022, "total_flos": 79922856209817600, "step": 20800 }, { "loss": 7.39953125, "learning_rate": 9.911587331843568e-06, "epoch": 0.08408060473667484, "total_flos": 80303587298549760, "step": 20900 }, { "loss": 7.42390625, "learning_rate": 9.910776206447636e-06, "epoch": 0.08448290428086945, "total_flos": 80684642373058560, "step": 21000 }, { "loss": 7.4471875, "learning_rate": 9.909965081051706e-06, "epoch": 0.08488520382506407, "total_flos": 81065352216821760, "step": 21100 }, { "loss": 7.419375, "learning_rate": 9.909153955655776e-06, "epoch": 0.08528750336925868, "total_flos": 81455824123822080, "step": 21200 }, { "loss": 7.3990625, "learning_rate": 9.908342830259845e-06, "epoch": 0.0856898029134533, "total_flos": 81826309826273280, "step": 21300 }, { "loss": 7.42984375, "learning_rate": 9.907531704863915e-06, "epoch": 0.08609210245764791, "total_flos": 82199849493012480, "step": 21400 }, { "loss": 7.45984375, "learning_rate": 9.906720579467983e-06, "epoch": 0.08649440200184254, "total_flos": 82593672793866240, "step": 21500 }, { "loss": 7.4053125, "learning_rate": 9.905909454072053e-06, "epoch": 0.08689670154603715, "total_flos": 82978190798315520, "step": 21600 }, { "loss": 7.3834375, "learning_rate": 9.905098328676123e-06, "epoch": 0.08729900109023177, "total_flos": 83351310876917760, "step": 21700 }, { "loss": 7.41765625, "learning_rate": 9.904287203280191e-06, "epoch": 0.08770130063442638, "total_flos": 83739031560437760, "step": 21800 }, { "loss": 7.40453125, "learning_rate": 9.903476077884261e-06, "epoch": 0.088103600178621, "total_flos": 84137879296450560, "step": 21900 }, { "loss": 7.4075, "learning_rate": 9.902664952488331e-06, "epoch": 0.08850589972281561, "total_flos": 84518934370959360, "step": 22000 }, { "loss": 7.345, "learning_rate": 9.9018538270924e-06, "epoch": 0.08890819926701023, "total_flos": 84888379069931520, "step": 22100 }, { "loss": 7.3840625, "learning_rate": 9.90104270169647e-06, "epoch": 0.08931049881120484, "total_flos": 85271787024752640, "step": 22200 }, { "loss": 7.39375, "learning_rate": 9.900231576300538e-06, "epoch": 0.08971279835539947, "total_flos": 85646941309132800, "step": 22300 }, { "loss": 7.37875, "learning_rate": 9.899420450904608e-06, "epoch": 0.09011509789959408, "total_flos": 86029759716065280, "step": 22400 }, { "loss": 7.37671875, "learning_rate": 9.898609325508678e-06, "epoch": 0.0905173974437887, "total_flos": 86404117314109440, "step": 22500 }, { "loss": 7.3678125, "learning_rate": 9.897798200112747e-06, "epoch": 0.09091969698798331, "total_flos": 86794169632972800, "step": 22600 }, { "loss": 7.40109375, "learning_rate": 9.896987074716817e-06, "epoch": 0.09132199653217793, "total_flos": 87179707395932160, "step": 22700 }, { "loss": 7.38484375, "learning_rate": 9.896175949320887e-06, "epoch": 0.09172429607637254, "total_flos": 87562642650193920, "step": 22800 }, { "loss": 7.408125, "learning_rate": 9.895364823924955e-06, "epoch": 0.09212659562056716, "total_flos": 87955037226885120, "step": 22900 }, { "loss": 7.399375, "learning_rate": 9.894553698529025e-06, "epoch": 0.09252889516476177, "total_flos": 88347670809477120, "step": 23000 }, { "loss": 7.33796875, "learning_rate": 9.893742573133093e-06, "epoch": 0.09293119470895639, "total_flos": 88737500056166400, "step": 23100 }, { "loss": 7.34609375, "learning_rate": 9.892931447737163e-06, "epoch": 0.09333349425315102, "total_flos": 89125884644966400, "step": 23200 }, { "loss": 7.35546875, "learning_rate": 9.892120322341234e-06, "epoch": 0.09373579379734563, "total_flos": 89530882799493120, "step": 23300 }, { "loss": 7.349375, "learning_rate": 9.891309196945302e-06, "epoch": 0.09413809334154025, "total_flos": 89904863299338240, "step": 23400 }, { "loss": 7.395625, "learning_rate": 9.890498071549372e-06, "epoch": 0.09454039288573486, "total_flos": 90283294620180480, "step": 23500 }, { "loss": 7.32953125, "learning_rate": 9.889686946153442e-06, "epoch": 0.09494269242992948, "total_flos": 90644533449891840, "step": 23600 }, { "loss": 7.4021875, "learning_rate": 9.88887582075751e-06, "epoch": 0.09534499197412409, "total_flos": 91044799287582720, "step": 23700 }, { "loss": 7.38765625, "learning_rate": 9.88806469536158e-06, "epoch": 0.0957472915183187, "total_flos": 91433805291724800, "step": 23800 }, { "loss": 7.3321875, "learning_rate": 9.887253569965649e-06, "epoch": 0.09614959106251332, "total_flos": 91820463726796800, "step": 23900 }, { "loss": 7.394375, "learning_rate": 9.886442444569719e-06, "epoch": 0.09655189060670795, "total_flos": 92221313801134080, "step": 24000 }, { "loss": 7.2884375, "learning_rate": 9.885631319173789e-06, "epoch": 0.09695419015090256, "total_flos": 92598932501913600, "step": 24100 }, { "loss": 7.36, "learning_rate": 9.884820193777857e-06, "epoch": 0.09735648969509718, "total_flos": 92973550350827520, "step": 24200 }, { "loss": 7.311875, "learning_rate": 9.884009068381927e-06, "epoch": 0.0977587892392918, "total_flos": 93352645576949760, "step": 24300 }, { "loss": 7.329375, "learning_rate": 9.883197942985997e-06, "epoch": 0.09816108878348641, "total_flos": 93730593574748160, "step": 24400 }, { "loss": 7.30859375, "learning_rate": 9.882386817590066e-06, "epoch": 0.09856338832768102, "total_flos": 94095003216076800, "step": 24500 }, { "loss": 7.33078125, "learning_rate": 9.881575692194136e-06, "epoch": 0.09896568787187564, "total_flos": 94479728358973440, "step": 24600 }, { "loss": 7.3446875, "learning_rate": 9.880764566798206e-06, "epoch": 0.09936798741607025, "total_flos": 94867247215288320, "step": 24700 }, { "loss": 7.3109375, "learning_rate": 9.879953441402274e-06, "epoch": 0.09977028696026487, "total_flos": 95259875486638080, "step": 24800 }, { "loss": 7.34515625, "learning_rate": 9.879142316006344e-06, "epoch": 0.1001725865044595, "total_flos": 95651531800657920, "step": 24900 }, { "loss": 7.34765625, "learning_rate": 9.878331190610412e-06, "epoch": 0.10057488604865411, "total_flos": 96040994571632640, "step": 25000 }, { "loss": 7.30484375, "learning_rate": 9.877520065214482e-06, "epoch": 0.10097718559284873, "total_flos": 96417997168312320, "step": 25100 }, { "loss": 7.3590625, "learning_rate": 9.876708939818552e-06, "epoch": 0.10137948513704334, "total_flos": 96802971939594240, "step": 25200 }, { "loss": 7.36375, "learning_rate": 9.87589781442262e-06, "epoch": 0.10178178468123796, "total_flos": 97176729367265280, "step": 25300 }, { "loss": 7.2840625, "learning_rate": 9.875086689026691e-06, "epoch": 0.10218408422543257, "total_flos": 97564322580971520, "step": 25400 }, { "loss": 7.314375, "learning_rate": 9.874275563630761e-06, "epoch": 0.10258638376962718, "total_flos": 97934489608888320, "step": 25500 }, { "loss": 7.30359375, "learning_rate": 9.87346443823483e-06, "epoch": 0.1029886833138218, "total_flos": 98315751821844480, "step": 25600 }, { "loss": 7.3071875, "learning_rate": 9.8726533128389e-06, "epoch": 0.10339098285801643, "total_flos": 98696849386291200, "step": 25700 }, { "loss": 7.31453125, "learning_rate": 9.871842187442968e-06, "epoch": 0.10379328240221104, "total_flos": 99077840725893120, "step": 25800 }, { "loss": 7.30234375, "learning_rate": 9.871031062047038e-06, "epoch": 0.10419558194640566, "total_flos": 99467122914631680, "step": 25900 }, { "loss": 7.299375, "learning_rate": 9.870219936651108e-06, "epoch": 0.10459788149060027, "total_flos": 99855284431257600, "step": 26000 }, { "loss": 7.3221875, "learning_rate": 9.869408811255176e-06, "epoch": 0.10500018103479489, "total_flos": 100255183793233920, "step": 26100 }, { "loss": 7.308125, "learning_rate": 9.868597685859246e-06, "epoch": 0.1054024805789895, "total_flos": 100641773182156800, "step": 26200 }, { "loss": 7.26640625, "learning_rate": 9.867786560463316e-06, "epoch": 0.10580478012318412, "total_flos": 101026445212631040, "step": 26300 }, { "loss": 7.2678125, "learning_rate": 9.866975435067385e-06, "epoch": 0.10620707966737873, "total_flos": 101414229631057920, "step": 26400 }, { "loss": 7.2965625, "learning_rate": 9.866164309671455e-06, "epoch": 0.10660937921157336, "total_flos": 101791577458483200, "step": 26500 }, { "loss": 7.3140625, "learning_rate": 9.865353184275523e-06, "epoch": 0.10701167875576797, "total_flos": 102168436651622400, "step": 26600 }, { "loss": 7.3359375, "learning_rate": 9.864542058879593e-06, "epoch": 0.10741397829996259, "total_flos": 102549799778181120, "step": 26700 }, { "loss": 7.283125, "learning_rate": 9.863730933483663e-06, "epoch": 0.1078162778441572, "total_flos": 102927354744053760, "step": 26800 }, { "loss": 7.28046875, "learning_rate": 9.862919808087731e-06, "epoch": 0.10821857738835182, "total_flos": 103312531342540800, "step": 26900 }, { "loss": 7.23046875, "learning_rate": 9.862108682691801e-06, "epoch": 0.10862087693254643, "total_flos": 103705515467120640, "step": 27000 }, { "loss": 7.26546875, "learning_rate": 9.861297557295871e-06, "epoch": 0.10902317647674105, "total_flos": 104077939772989440, "step": 27100 }, { "loss": 7.26859375, "learning_rate": 9.86048643189994e-06, "epoch": 0.10942547602093566, "total_flos": 104479368772730880, "step": 27200 }, { "loss": 7.27578125, "learning_rate": 9.85967530650401e-06, "epoch": 0.10982777556513028, "total_flos": 104858554289971200, "step": 27300 }, { "loss": 7.26890625, "learning_rate": 9.858864181108078e-06, "epoch": 0.11023007510932491, "total_flos": 105236635068825600, "step": 27400 }, { "loss": 7.23625, "learning_rate": 9.858053055712148e-06, "epoch": 0.11063237465351952, "total_flos": 105633793829806080, "step": 27500 }, { "loss": 7.256875, "learning_rate": 9.857241930316218e-06, "epoch": 0.11103467419771414, "total_flos": 106004396379586560, "step": 27600 }, { "loss": 7.29515625, "learning_rate": 9.856430804920287e-06, "epoch": 0.11143697374190875, "total_flos": 106373671118807040, "step": 27700 }, { "loss": 7.24625, "learning_rate": 9.855619679524357e-06, "epoch": 0.11183927328610337, "total_flos": 106768498244444160, "step": 27800 }, { "loss": 7.27109375, "learning_rate": 9.854808554128427e-06, "epoch": 0.11224157283029798, "total_flos": 107159299448463360, "step": 27900 }, { "loss": 7.29265625, "learning_rate": 9.853997428732495e-06, "epoch": 0.1126438723744926, "total_flos": 107545203687137280, "step": 28000 }, { "loss": 7.235625, "learning_rate": 9.853186303336565e-06, "epoch": 0.11304617191868721, "total_flos": 107920012740771840, "step": 28100 }, { "loss": 7.295625, "learning_rate": 9.852375177940633e-06, "epoch": 0.11344847146288184, "total_flos": 108306873003048960, "step": 28200 }, { "loss": 7.19234375, "learning_rate": 9.851564052544703e-06, "epoch": 0.11385077100707645, "total_flos": 108680290511216640, "step": 28300 }, { "loss": 7.23109375, "learning_rate": 9.850752927148774e-06, "epoch": 0.11425307055127107, "total_flos": 109074538711449600, "step": 28400 }, { "loss": 7.29703125, "learning_rate": 9.849941801752842e-06, "epoch": 0.11465537009546568, "total_flos": 109464994684723200, "step": 28500 }, { "loss": 7.22515625, "learning_rate": 9.849130676356912e-06, "epoch": 0.1150576696396603, "total_flos": 109848216746065920, "step": 28600 }, { "loss": 7.2778125, "learning_rate": 9.848319550960982e-06, "epoch": 0.11545996918385491, "total_flos": 110252965272207360, "step": 28700 }, { "loss": 7.1975, "learning_rate": 9.84750842556505e-06, "epoch": 0.11586226872804953, "total_flos": 110629792597893120, "step": 28800 }, { "loss": 7.24515625, "learning_rate": 9.84669730016912e-06, "epoch": 0.11626456827224414, "total_flos": 111016105802219520, "step": 28900 }, { "loss": 7.2371875, "learning_rate": 9.84588617477319e-06, "epoch": 0.11666686781643876, "total_flos": 111414565817548800, "step": 29000 }, { "loss": 7.266875, "learning_rate": 9.845075049377259e-06, "epoch": 0.11706916736063339, "total_flos": 111809031778713600, "step": 29100 }, { "loss": 7.23875, "learning_rate": 9.844263923981329e-06, "epoch": 0.117471466904828, "total_flos": 112201745029939200, "step": 29200 }, { "loss": 7.19140625, "learning_rate": 9.843452798585397e-06, "epoch": 0.11787376644902262, "total_flos": 112568326969344000, "step": 29300 }, { "loss": 7.20671875, "learning_rate": 9.842641673189469e-06, "epoch": 0.11827606599321723, "total_flos": 112948962455715840, "step": 29400 }, { "loss": 7.2121875, "learning_rate": 9.841830547793537e-06, "epoch": 0.11867836553741185, "total_flos": 113349515100487680, "step": 29500 }, { "loss": 7.22765625, "learning_rate": 9.841019422397606e-06, "epoch": 0.11908066508160646, "total_flos": 113742196484259840, "step": 29600 }, { "loss": 7.24625, "learning_rate": 9.840208297001676e-06, "epoch": 0.11948296462580107, "total_flos": 114137129834741760, "step": 29700 }, { "loss": 7.19625, "learning_rate": 9.839397171605746e-06, "epoch": 0.11988526416999569, "total_flos": 114524760227143680, "step": 29800 }, { "loss": 7.20109375, "learning_rate": 9.838586046209814e-06, "epoch": 0.12028756371419032, "total_flos": 114902007140966400, "step": 29900 }, { "loss": 7.2525, "learning_rate": 9.837774920813884e-06, "epoch": 0.12068986325838493, "total_flos": 115280199455907840, "step": 30000 }, { "loss": 7.19359375, "learning_rate": 9.836963795417952e-06, "epoch": 0.12109216280257955, "total_flos": 115646744216616960, "step": 30100 }, { "loss": 7.16375, "learning_rate": 9.836152670022024e-06, "epoch": 0.12149446234677416, "total_flos": 116023221000314880, "step": 30200 }, { "loss": 7.23484375, "learning_rate": 9.835341544626092e-06, "epoch": 0.12189676189096878, "total_flos": 116398970143825920, "step": 30300 }, { "loss": 7.2353125, "learning_rate": 9.83453041923016e-06, "epoch": 0.12229906143516339, "total_flos": 116806177775124480, "step": 30400 }, { "loss": 7.24171875, "learning_rate": 9.833719293834231e-06, "epoch": 0.122701360979358, "total_flos": 117200266638090240, "step": 30500 }, { "loss": 7.17109375, "learning_rate": 9.832908168438301e-06, "epoch": 0.12310366052355262, "total_flos": 117574050621972480, "step": 30600 }, { "loss": 7.19109375, "learning_rate": 9.832097043042371e-06, "epoch": 0.12350596006774724, "total_flos": 117964097629593600, "step": 30700 }, { "loss": 7.2234375, "learning_rate": 9.83128591764644e-06, "epoch": 0.12390825961194186, "total_flos": 118350941958144000, "step": 30800 }, { "loss": 7.17875, "learning_rate": 9.830474792250508e-06, "epoch": 0.12431055915613648, "total_flos": 118728799664824320, "step": 30900 }, { "loss": 7.178125, "learning_rate": 9.82966366685458e-06, "epoch": 0.1247128587003311, "total_flos": 119117593219276800, "step": 31000 }, { "loss": 7.17296875, "learning_rate": 9.828852541458648e-06, "epoch": 0.1251151582445257, "total_flos": 119503168160931840, "step": 31100 }, { "loss": 7.1753125, "learning_rate": 9.828041416062716e-06, "epoch": 0.12551745778872034, "total_flos": 119892689355571200, "step": 31200 }, { "loss": 7.19859375, "learning_rate": 9.827230290666786e-06, "epoch": 0.12591975733291494, "total_flos": 120286581702574080, "step": 31300 }, { "loss": 7.211875, "learning_rate": 9.826419165270856e-06, "epoch": 0.12632205687710957, "total_flos": 120693258209648640, "step": 31400 }, { "loss": 7.19703125, "learning_rate": 9.825608039874926e-06, "epoch": 0.12672435642130417, "total_flos": 121083124635033600, "step": 31500 }, { "loss": 7.143125, "learning_rate": 9.824796914478995e-06, "epoch": 0.1271266559654988, "total_flos": 121453902455808000, "step": 31600 }, { "loss": 7.18421875, "learning_rate": 9.823985789083063e-06, "epoch": 0.1275289555096934, "total_flos": 121845702173368320, "step": 31700 }, { "loss": 7.20140625, "learning_rate": 9.823174663687135e-06, "epoch": 0.12793125505388803, "total_flos": 122229412868997120, "step": 31800 }, { "loss": 7.13390625, "learning_rate": 9.822363538291203e-06, "epoch": 0.12833355459808263, "total_flos": 122614701003571200, "step": 31900 }, { "loss": 7.149375, "learning_rate": 9.821552412895271e-06, "epoch": 0.12873585414227726, "total_flos": 122987013773352960, "step": 32000 }, { "loss": 7.1740625, "learning_rate": 9.820741287499341e-06, "epoch": 0.12913815368647188, "total_flos": 123371738916249600, "step": 32100 }, { "loss": 7.160625, "learning_rate": 9.819930162103411e-06, "epoch": 0.12954045323066649, "total_flos": 123761600030392320, "step": 32200 }, { "loss": 7.18765625, "learning_rate": 9.819119036707481e-06, "epoch": 0.12994275277486111, "total_flos": 124137481954959360, "step": 32300 }, { "loss": 7.15609375, "learning_rate": 9.81830791131155e-06, "epoch": 0.13034505231905572, "total_flos": 124519795793879040, "step": 32400 }, { "loss": 7.19828125, "learning_rate": 9.817496785915618e-06, "epoch": 0.13074735186325034, "total_flos": 124905009571061760, "step": 32500 }, { "loss": 7.17890625, "learning_rate": 9.81668566051969e-06, "epoch": 0.13114965140744494, "total_flos": 125283711765258240, "step": 32600 }, { "loss": 7.15921875, "learning_rate": 9.815874535123758e-06, "epoch": 0.13155195095163957, "total_flos": 125669063634739200, "step": 32700 }, { "loss": 7.22890625, "learning_rate": 9.815063409727828e-06, "epoch": 0.13195425049583417, "total_flos": 126054739489996800, "step": 32800 }, { "loss": 7.11015625, "learning_rate": 9.814252284331897e-06, "epoch": 0.1323565500400288, "total_flos": 126440723397304320, "step": 32900 }, { "loss": 7.11375, "learning_rate": 9.813441158935967e-06, "epoch": 0.13275884958422343, "total_flos": 126843522697543680, "step": 33000 }, { "loss": 7.14390625, "learning_rate": 9.812630033540037e-06, "epoch": 0.13316114912841803, "total_flos": 127227504266526720, "step": 33100 }, { "loss": 7.10625, "learning_rate": 9.811818908144105e-06, "epoch": 0.13356344867261266, "total_flos": 127602536392335360, "step": 33200 }, { "loss": 7.130625, "learning_rate": 9.811007782748173e-06, "epoch": 0.13396574821680726, "total_flos": 127994851300392960, "step": 33300 }, { "loss": 7.1325, "learning_rate": 9.810196657352245e-06, "epoch": 0.1343680477610019, "total_flos": 128385264783728640, "step": 33400 }, { "loss": 7.10703125, "learning_rate": 9.809385531956314e-06, "epoch": 0.1347703473051965, "total_flos": 128761486627799040, "step": 33500 }, { "loss": 7.11765625, "learning_rate": 9.808574406560384e-06, "epoch": 0.13517264684939112, "total_flos": 129162639442944000, "step": 33600 }, { "loss": 7.16875, "learning_rate": 9.807763281164452e-06, "epoch": 0.13557494639358575, "total_flos": 129562336977715200, "step": 33700 }, { "loss": 7.14296875, "learning_rate": 9.806952155768522e-06, "epoch": 0.13597724593778035, "total_flos": 129947593244835840, "step": 33800 }, { "loss": 7.15, "learning_rate": 9.806141030372592e-06, "epoch": 0.13637954548197498, "total_flos": 130320984196792320, "step": 33900 }, { "loss": 7.10125, "learning_rate": 9.80532990497666e-06, "epoch": 0.13678184502616958, "total_flos": 130698284223037440, "step": 34000 }, { "loss": 7.1159375, "learning_rate": 9.80451877958073e-06, "epoch": 0.1371841445703642, "total_flos": 131082563221585920, "step": 34100 }, { "loss": 7.153125, "learning_rate": 9.8037076541848e-06, "epoch": 0.1375864441145588, "total_flos": 131454998149939200, "step": 34200 }, { "loss": 7.1196875, "learning_rate": 9.802896528788869e-06, "epoch": 0.13798874365875344, "total_flos": 131855120584089600, "step": 34300 }, { "loss": 7.11375, "learning_rate": 9.802085403392939e-06, "epoch": 0.13839104320294804, "total_flos": 132245125101772800, "step": 34400 }, { "loss": 7.1059375, "learning_rate": 9.801274277997009e-06, "epoch": 0.13879334274714267, "total_flos": 132617225421864960, "step": 34500 }, { "loss": 7.07625, "learning_rate": 9.800463152601077e-06, "epoch": 0.1391956422913373, "total_flos": 133012663340359680, "step": 34600 }, { "loss": 7.1559375, "learning_rate": 9.799652027205147e-06, "epoch": 0.1395979418355319, "total_flos": 133410889661030400, "step": 34700 }, { "loss": 7.093125, "learning_rate": 9.798840901809216e-06, "epoch": 0.14000024137972653, "total_flos": 133789852106096640, "step": 34800 }, { "loss": 7.1225, "learning_rate": 9.798029776413286e-06, "epoch": 0.14040254092392113, "total_flos": 134176085641789440, "step": 34900 }, { "loss": 7.1196875, "learning_rate": 9.797218651017356e-06, "epoch": 0.14080484046811576, "total_flos": 134555095888035840, "step": 35000 }, { "loss": 7.13375, "learning_rate": 9.796407525621424e-06, "epoch": 0.14120714001231036, "total_flos": 134949992059822080, "step": 35100 }, { "loss": 7.1346875, "learning_rate": 9.795596400225494e-06, "epoch": 0.14160943955650498, "total_flos": 135353763317391360, "step": 35200 }, { "loss": 7.1465625, "learning_rate": 9.794785274829564e-06, "epoch": 0.14201173910069959, "total_flos": 135726081398415360, "step": 35300 }, { "loss": 7.1125, "learning_rate": 9.793974149433632e-06, "epoch": 0.14241403864489421, "total_flos": 136104815460065280, "step": 35400 }, { "loss": 7.11, "learning_rate": 9.793163024037703e-06, "epoch": 0.14281633818908884, "total_flos": 136514960208322560, "step": 35500 }, { "loss": 7.076875, "learning_rate": 9.792351898641771e-06, "epoch": 0.14321863773328344, "total_flos": 136911147011973120, "step": 35600 }, { "loss": 7.1040625, "learning_rate": 9.791540773245841e-06, "epoch": 0.14362093727747807, "total_flos": 137311503140782080, "step": 35700 }, { "loss": 7.0725, "learning_rate": 9.790729647849911e-06, "epoch": 0.14402323682167267, "total_flos": 137703727757721600, "step": 35800 }, { "loss": 7.0671875, "learning_rate": 9.78991852245398e-06, "epoch": 0.1444255363658673, "total_flos": 138074027566694400, "step": 35900 }, { "loss": 7.0953125, "learning_rate": 9.78910739705805e-06, "epoch": 0.1448278359100619, "total_flos": 138445782656040960, "step": 36000 }, { "loss": 7.0428125, "learning_rate": 9.78829627166212e-06, "epoch": 0.14523013545425653, "total_flos": 138811196122152960, "step": 36100 }, { "loss": 7.020625, "learning_rate": 9.787485146266188e-06, "epoch": 0.14563243499845116, "total_flos": 139174118615654400, "step": 36200 }, { "loss": 7.1046875, "learning_rate": 9.786674020870258e-06, "epoch": 0.14603473454264576, "total_flos": 139553840568360960, "step": 36300 }, { "loss": 7.0353125, "learning_rate": 9.785862895474326e-06, "epoch": 0.1464370340868404, "total_flos": 139926742886031360, "step": 36400 }, { "loss": 7.106875, "learning_rate": 9.785051770078396e-06, "epoch": 0.146839333631035, "total_flos": 140327438934343680, "step": 36500 }, { "loss": 7.111875, "learning_rate": 9.784240644682466e-06, "epoch": 0.14724163317522962, "total_flos": 140717783371530240, "step": 36600 }, { "loss": 7.14125, "learning_rate": 9.783429519286535e-06, "epoch": 0.14764393271942422, "total_flos": 141113237223751680, "step": 36700 }, { "loss": 7.0703125, "learning_rate": 9.782618393890605e-06, "epoch": 0.14804623226361885, "total_flos": 141485868668067840, "step": 36800 }, { "loss": 7.039375, "learning_rate": 9.781807268494675e-06, "epoch": 0.14844853180781345, "total_flos": 141880292139294720, "step": 36900 }, { "loss": 7.0534375, "learning_rate": 9.780996143098743e-06, "epoch": 0.14885083135200808, "total_flos": 142256152818892800, "step": 37000 }, { "loss": 7.08125, "learning_rate": 9.780185017702813e-06, "epoch": 0.1492531308962027, "total_flos": 142638551637688320, "step": 37100 }, { "loss": 7.076875, "learning_rate": 9.779373892306881e-06, "epoch": 0.1496554304403973, "total_flos": 143006387030261760, "step": 37200 }, { "loss": 7.118125, "learning_rate": 9.778562766910951e-06, "epoch": 0.15005772998459194, "total_flos": 143385540680048640, "step": 37300 }, { "loss": 7.065, "learning_rate": 9.777751641515021e-06, "epoch": 0.15046002952878654, "total_flos": 143766176166420480, "step": 37400 }, { "loss": 7.110625, "learning_rate": 9.77694051611909e-06, "epoch": 0.15086232907298117, "total_flos": 144146413309624320, "step": 37500 }, { "loss": 7.03875, "learning_rate": 9.77612939072316e-06, "epoch": 0.15126462861717577, "total_flos": 144521148005867520, "step": 37600 }, { "loss": 7.045625, "learning_rate": 9.77531826532723e-06, "epoch": 0.1516669281613704, "total_flos": 144901895028326400, "step": 37700 }, { "loss": 7.0728125, "learning_rate": 9.774507139931298e-06, "epoch": 0.152069227705565, "total_flos": 145294964132782080, "step": 37800 }, { "loss": 7.0140625, "learning_rate": 9.773696014535368e-06, "epoch": 0.15247152724975963, "total_flos": 145663659946598400, "step": 37900 }, { "loss": 7.0878125, "learning_rate": 9.772884889139437e-06, "epoch": 0.15287382679395425, "total_flos": 146054211522232320, "step": 38000 }, { "loss": 7.140625, "learning_rate": 9.772073763743507e-06, "epoch": 0.15327612633814885, "total_flos": 146444651561779200, "step": 38100 }, { "loss": 7.06, "learning_rate": 9.771262638347577e-06, "epoch": 0.15367842588234348, "total_flos": 146816226068889600, "step": 38200 }, { "loss": 7.078125, "learning_rate": 9.770451512951645e-06, "epoch": 0.15408072542653808, "total_flos": 147213368896143360, "step": 38300 }, { "loss": 7.04125, "learning_rate": 9.769640387555715e-06, "epoch": 0.1544830249707327, "total_flos": 147586366816174080, "step": 38400 }, { "loss": 7.108125, "learning_rate": 9.768829262159785e-06, "epoch": 0.15488532451492731, "total_flos": 147975133814415360, "step": 38500 }, { "loss": 7.074375, "learning_rate": 9.768018136763854e-06, "epoch": 0.15528762405912194, "total_flos": 148384901464473600, "step": 38600 }, { "loss": 7.0159375, "learning_rate": 9.767207011367924e-06, "epoch": 0.15568992360331657, "total_flos": 148783945716449280, "step": 38700 }, { "loss": 7.0240625, "learning_rate": 9.766395885971994e-06, "epoch": 0.15609222314751117, "total_flos": 149166445448847360, "step": 38800 }, { "loss": 7.0553125, "learning_rate": 9.765584760576062e-06, "epoch": 0.1564945226917058, "total_flos": 149560316550881280, "step": 38900 }, { "loss": 6.981875, "learning_rate": 9.764773635180132e-06, "epoch": 0.1568968222359004, "total_flos": 149948578981109760, "step": 39000 }, { "loss": 7.0646875, "learning_rate": 9.7639625097842e-06, "epoch": 0.15729912178009503, "total_flos": 150337648720158720, "step": 39100 }, { "loss": 7.0615625, "learning_rate": 9.76315138438827e-06, "epoch": 0.15770142132428963, "total_flos": 150723850388398080, "step": 39200 }, { "loss": 7.106875, "learning_rate": 9.76234025899234e-06, "epoch": 0.15810372086848426, "total_flos": 151124870422487040, "step": 39300 }, { "loss": 7.046875, "learning_rate": 9.761529133596409e-06, "epoch": 0.15850602041267886, "total_flos": 151532901296332800, "step": 39400 }, { "loss": 7.0040625, "learning_rate": 9.760718008200479e-06, "epoch": 0.1589083199568735, "total_flos": 151914296290344960, "step": 39500 }, { "loss": 7.0028125, "learning_rate": 9.759906882804549e-06, "epoch": 0.15931061950106812, "total_flos": 152284686390435840, "step": 39600 }, { "loss": 7.046875, "learning_rate": 9.759095757408617e-06, "epoch": 0.15971291904526272, "total_flos": 152670160418488320, "step": 39700 }, { "loss": 6.99375, "learning_rate": 9.758284632012687e-06, "epoch": 0.16011521858945735, "total_flos": 153085945706004480, "step": 39800 }, { "loss": 6.991875, "learning_rate": 9.757473506616756e-06, "epoch": 0.16051751813365195, "total_flos": 153468976562626560, "step": 39900 }, { "loss": 7.0084375, "learning_rate": 9.756662381220826e-06, "epoch": 0.16091981767784658, "total_flos": 153844120224522240, "step": 40000 }, { "loss": 7.0434375, "learning_rate": 9.755851255824896e-06, "epoch": 0.16132211722204118, "total_flos": 154229041883381760, "step": 40100 }, { "loss": 7.0384375, "learning_rate": 9.755040130428964e-06, "epoch": 0.1617244167662358, "total_flos": 154623853075292160, "step": 40200 }, { "loss": 7.014375, "learning_rate": 9.754229005033034e-06, "epoch": 0.1621267163104304, "total_flos": 154998619638988800, "step": 40300 }, { "loss": 7.070625, "learning_rate": 9.753417879637104e-06, "epoch": 0.16252901585462504, "total_flos": 155370449085726720, "step": 40400 }, { "loss": 6.985, "learning_rate": 9.752606754241172e-06, "epoch": 0.16293131539881966, "total_flos": 155741736785756160, "step": 40500 }, { "loss": 6.9825, "learning_rate": 9.751795628845243e-06, "epoch": 0.16333361494301427, "total_flos": 156123854108712960, "step": 40600 }, { "loss": 6.9534375, "learning_rate": 9.750984503449311e-06, "epoch": 0.1637359144872089, "total_flos": 156505434996203520, "step": 40700 }, { "loss": 6.9928125, "learning_rate": 9.750173378053381e-06, "epoch": 0.1641382140314035, "total_flos": 156879373006110720, "step": 40800 }, { "loss": 6.9946875, "learning_rate": 9.749362252657451e-06, "epoch": 0.16454051357559812, "total_flos": 157265271933542400, "step": 40900 }, { "loss": 6.9696875, "learning_rate": 9.74855112726152e-06, "epoch": 0.16494281311979272, "total_flos": 157672782305648640, "step": 41000 }, { "loss": 7.0025, "learning_rate": 9.74774000186559e-06, "epoch": 0.16534511266398735, "total_flos": 158054825271214080, "step": 41100 }, { "loss": 6.973125, "learning_rate": 9.74692887646966e-06, "epoch": 0.16574741220818195, "total_flos": 158445642408960000, "step": 41200 }, { "loss": 7.0371875, "learning_rate": 9.746117751073728e-06, "epoch": 0.16614971175237658, "total_flos": 158826081379368960, "step": 41300 }, { "loss": 6.9796875, "learning_rate": 9.745306625677798e-06, "epoch": 0.1665520112965712, "total_flos": 159206860269281280, "step": 41400 }, { "loss": 6.9453125, "learning_rate": 9.744495500281866e-06, "epoch": 0.1669543108407658, "total_flos": 159586576910745600, "step": 41500 }, { "loss": 6.97625, "learning_rate": 9.743684374885936e-06, "epoch": 0.16735661038496044, "total_flos": 159970585035939840, "step": 41600 }, { "loss": 6.960625, "learning_rate": 9.742873249490006e-06, "epoch": 0.16775890992915504, "total_flos": 160354747187159040, "step": 41700 }, { "loss": 7.0678125, "learning_rate": 9.742062124094075e-06, "epoch": 0.16816120947334967, "total_flos": 160741055080243200, "step": 41800 }, { "loss": 6.9796875, "learning_rate": 9.741250998698145e-06, "epoch": 0.16856350901754427, "total_flos": 161129583072583680, "step": 41900 }, { "loss": 7.013125, "learning_rate": 9.740439873302215e-06, "epoch": 0.1689658085617389, "total_flos": 161525371533066240, "step": 42000 }, { "loss": 6.9915625, "learning_rate": 9.739628747906283e-06, "epoch": 0.16936810810593353, "total_flos": 161902135123845120, "step": 42100 }, { "loss": 6.9425, "learning_rate": 9.738817622510353e-06, "epoch": 0.16977040765012813, "total_flos": 162279520129966080, "step": 42200 }, { "loss": 6.988125, "learning_rate": 9.738006497114421e-06, "epoch": 0.17017270719432276, "total_flos": 162659417353666560, "step": 42300 }, { "loss": 6.98875, "learning_rate": 9.737195371718491e-06, "epoch": 0.17057500673851736, "total_flos": 163049427182592000, "step": 42400 }, { "loss": 6.9559375, "learning_rate": 9.736384246322561e-06, "epoch": 0.170977306282712, "total_flos": 163436792012881920, "step": 42500 }, { "loss": 6.9840625, "learning_rate": 9.73557312092663e-06, "epoch": 0.1713796058269066, "total_flos": 163823753188761600, "step": 42600 }, { "loss": 6.92, "learning_rate": 9.7347619955307e-06, "epoch": 0.17178190537110122, "total_flos": 164196081892270080, "step": 42700 }, { "loss": 6.9028125, "learning_rate": 9.73395087013477e-06, "epoch": 0.17218420491529582, "total_flos": 164567417393479680, "step": 42800 }, { "loss": 6.975625, "learning_rate": 9.733139744738838e-06, "epoch": 0.17258650445949045, "total_flos": 164965951766200320, "step": 42900 }, { "loss": 7.0034375, "learning_rate": 9.732328619342908e-06, "epoch": 0.17298880400368508, "total_flos": 165354469136056320, "step": 43000 }, { "loss": 6.9709375, "learning_rate": 9.731517493946977e-06, "epoch": 0.17339110354787968, "total_flos": 165723409267015680, "step": 43100 }, { "loss": 6.9378125, "learning_rate": 9.730706368551047e-06, "epoch": 0.1737934030920743, "total_flos": 166107794490408960, "step": 43200 }, { "loss": 6.9584375, "learning_rate": 9.729895243155117e-06, "epoch": 0.1741957026362689, "total_flos": 166487197768581120, "step": 43300 }, { "loss": 6.9559375, "learning_rate": 9.729084117759185e-06, "epoch": 0.17459800218046354, "total_flos": 166881732775895040, "step": 43400 }, { "loss": 7.0228125, "learning_rate": 9.728272992363255e-06, "epoch": 0.17500030172465814, "total_flos": 167282726253772800, "step": 43500 }, { "loss": 6.8453125, "learning_rate": 9.727461866967325e-06, "epoch": 0.17540260126885276, "total_flos": 167649738403799040, "step": 43600 }, { "loss": 6.9203125, "learning_rate": 9.726650741571394e-06, "epoch": 0.17580490081304737, "total_flos": 168018120854323200, "step": 43700 }, { "loss": 6.9440625, "learning_rate": 9.725839616175464e-06, "epoch": 0.176207200357242, "total_flos": 168398947545415680, "step": 43800 }, { "loss": 6.82625, "learning_rate": 9.725028490779534e-06, "epoch": 0.17660949990143662, "total_flos": 168787656119992320, "step": 43900 }, { "loss": 6.9365625, "learning_rate": 9.724217365383602e-06, "epoch": 0.17701179944563122, "total_flos": 169170835691397120, "step": 44000 }, { "loss": 6.925, "learning_rate": 9.723406239987672e-06, "epoch": 0.17741409898982585, "total_flos": 169564786462064640, "step": 44100 }, { "loss": 6.916875, "learning_rate": 9.72259511459174e-06, "epoch": 0.17781639853402045, "total_flos": 169922944771276800, "step": 44200 }, { "loss": 6.861875, "learning_rate": 9.72178398919581e-06, "epoch": 0.17821869807821508, "total_flos": 170310479561318400, "step": 44300 }, { "loss": 6.9059375, "learning_rate": 9.72097286379988e-06, "epoch": 0.17862099762240968, "total_flos": 170693037717381120, "step": 44400 }, { "loss": 6.9290625, "learning_rate": 9.720161738403949e-06, "epoch": 0.1790232971666043, "total_flos": 171075144417853440, "step": 44500 }, { "loss": 6.8846875, "learning_rate": 9.719350613008019e-06, "epoch": 0.17942559671079894, "total_flos": 171449369234841600, "step": 44600 }, { "loss": 6.8784375, "learning_rate": 9.718539487612089e-06, "epoch": 0.17982789625499354, "total_flos": 171829054008852480, "step": 44700 }, { "loss": 6.9128125, "learning_rate": 9.717728362216157e-06, "epoch": 0.18023019579918817, "total_flos": 172213800396718080, "step": 44800 }, { "loss": 6.8603125, "learning_rate": 9.716917236820227e-06, "epoch": 0.18063249534338277, "total_flos": 172589990373335040, "step": 44900 }, { "loss": 6.9415625, "learning_rate": 9.716106111424296e-06, "epoch": 0.1810347948875774, "total_flos": 172972569774366720, "step": 45000 }, { "loss": 6.9240625, "learning_rate": 9.715294986028366e-06, "epoch": 0.181437094431772, "total_flos": 173370503976714240, "step": 45100 }, { "loss": 6.9134375, "learning_rate": 9.714483860632436e-06, "epoch": 0.18183939397596663, "total_flos": 173770392716206080, "step": 45200 }, { "loss": 6.9034375, "learning_rate": 9.713672735236504e-06, "epoch": 0.18224169352016123, "total_flos": 174142593949900800, "step": 45300 }, { "loss": 6.89125, "learning_rate": 9.712861609840574e-06, "epoch": 0.18264399306435586, "total_flos": 174534398978703360, "step": 45400 }, { "loss": 6.9025, "learning_rate": 9.712050484444644e-06, "epoch": 0.1830462926085505, "total_flos": 174931122217820160, "step": 45500 }, { "loss": 6.913125, "learning_rate": 9.711239359048712e-06, "epoch": 0.1834485921527451, "total_flos": 175323548661964800, "step": 45600 }, { "loss": 6.909375, "learning_rate": 9.710428233652783e-06, "epoch": 0.18385089169693972, "total_flos": 175697231732244480, "step": 45700 }, { "loss": 6.9184375, "learning_rate": 9.709617108256851e-06, "epoch": 0.18425319124113432, "total_flos": 176094411738193920, "step": 45800 }, { "loss": 6.88, "learning_rate": 9.708805982860921e-06, "epoch": 0.18465549078532895, "total_flos": 176487608312463360, "step": 45900 }, { "loss": 6.8671875, "learning_rate": 9.707994857464991e-06, "epoch": 0.18505779032952355, "total_flos": 176885627494686720, "step": 46000 }, { "loss": 6.8459375, "learning_rate": 9.70718373206906e-06, "epoch": 0.18546008987371818, "total_flos": 177280380262932480, "step": 46100 }, { "loss": 6.8765625, "learning_rate": 9.70637260667313e-06, "epoch": 0.18586238941791278, "total_flos": 177659810097315840, "step": 46200 }, { "loss": 6.9090625, "learning_rate": 9.7055614812772e-06, "epoch": 0.1862646889621074, "total_flos": 178035978828963840, "step": 46300 }, { "loss": 6.8896875, "learning_rate": 9.704750355881268e-06, "epoch": 0.18666698850630203, "total_flos": 178423242745651200, "step": 46400 }, { "loss": 6.909375, "learning_rate": 9.703939230485338e-06, "epoch": 0.18706928805049663, "total_flos": 178821240682905600, "step": 46500 }, { "loss": 6.878125, "learning_rate": 9.703128105089406e-06, "epoch": 0.18747158759469126, "total_flos": 179201605295923200, "step": 46600 }, { "loss": 6.8309375, "learning_rate": 9.702316979693476e-06, "epoch": 0.18787388713888586, "total_flos": 179580424337448960, "step": 46700 }, { "loss": 6.930625, "learning_rate": 9.701505854297546e-06, "epoch": 0.1882761866830805, "total_flos": 179983239571415040, "step": 46800 }, { "loss": 6.8496875, "learning_rate": 9.700694728901615e-06, "epoch": 0.1886784862272751, "total_flos": 180368819824312320, "step": 46900 }, { "loss": 6.805625, "learning_rate": 9.699883603505685e-06, "epoch": 0.18908078577146972, "total_flos": 180761331248332800, "step": 47000 }, { "loss": 6.825, "learning_rate": 9.699072478109755e-06, "epoch": 0.18948308531566432, "total_flos": 181139677589299200, "step": 47100 }, { "loss": 6.85, "learning_rate": 9.698261352713823e-06, "epoch": 0.18988538485985895, "total_flos": 181520451167969280, "step": 47200 }, { "loss": 6.8184375, "learning_rate": 9.697450227317893e-06, "epoch": 0.19028768440405358, "total_flos": 181885891190292480, "step": 47300 }, { "loss": 6.860625, "learning_rate": 9.696639101921961e-06, "epoch": 0.19068998394824818, "total_flos": 182251968561684480, "step": 47400 }, { "loss": 6.85625, "learning_rate": 9.695827976526031e-06, "epoch": 0.1910922834924428, "total_flos": 182627324673269760, "step": 47500 }, { "loss": 6.87125, "learning_rate": 9.695016851130101e-06, "epoch": 0.1914945830366374, "total_flos": 183006430521876480, "step": 47600 }, { "loss": 6.8284375, "learning_rate": 9.69420572573417e-06, "epoch": 0.19189688258083204, "total_flos": 183392557832724480, "step": 47700 }, { "loss": 6.8409375, "learning_rate": 9.69339460033824e-06, "epoch": 0.19229918212502664, "total_flos": 183777081148416000, "step": 47800 }, { "loss": 6.87625, "learning_rate": 9.69258347494231e-06, "epoch": 0.19270148166922127, "total_flos": 184166214622371840, "step": 47900 }, { "loss": 6.8271875, "learning_rate": 9.691772349546378e-06, "epoch": 0.1931037812134159, "total_flos": 184555693327073280, "step": 48000 }, { "loss": 6.864375, "learning_rate": 9.690961224150448e-06, "epoch": 0.1935060807576105, "total_flos": 184936520018165760, "step": 48100 }, { "loss": 6.815625, "learning_rate": 9.690150098754518e-06, "epoch": 0.19390838030180513, "total_flos": 185338055242752000, "step": 48200 }, { "loss": 6.768125, "learning_rate": 9.689338973358587e-06, "epoch": 0.19431067984599973, "total_flos": 185717022999060480, "step": 48300 }, { "loss": 6.8371875, "learning_rate": 9.688527847962657e-06, "epoch": 0.19471297939019436, "total_flos": 186113512543518720, "step": 48400 }, { "loss": 6.813125, "learning_rate": 9.687716722566725e-06, "epoch": 0.19511527893438896, "total_flos": 186493585038213120, "step": 48500 }, { "loss": 6.81625, "learning_rate": 9.686905597170795e-06, "epoch": 0.1955175784785836, "total_flos": 186885018280058880, "step": 48600 }, { "loss": 6.775, "learning_rate": 9.686094471774865e-06, "epoch": 0.1959198780227782, "total_flos": 187273546272399360, "step": 48700 }, { "loss": 6.7646875, "learning_rate": 9.685283346378934e-06, "epoch": 0.19632217756697282, "total_flos": 187647978227834880, "step": 48800 }, { "loss": 6.8228125, "learning_rate": 9.684472220983004e-06, "epoch": 0.19672447711116745, "total_flos": 188029490069176320, "step": 48900 }, { "loss": 6.8340625, "learning_rate": 9.683661095587074e-06, "epoch": 0.19712677665536205, "total_flos": 188408314421944320, "step": 49000 }, { "loss": 6.830625, "learning_rate": 9.682849970191142e-06, "epoch": 0.19752907619955667, "total_flos": 188775910808616960, "step": 49100 }, { "loss": 6.7403125, "learning_rate": 9.682038844795212e-06, "epoch": 0.19793137574375128, "total_flos": 189168698417233920, "step": 49200 }, { "loss": 6.8034375, "learning_rate": 9.68122771939928e-06, "epoch": 0.1983336752879459, "total_flos": 189549689756835840, "step": 49300 }, { "loss": 6.8034375, "learning_rate": 9.68041659400335e-06, "epoch": 0.1987359748321405, "total_flos": 189934940712714240, "step": 49400 }, { "loss": 6.8315625, "learning_rate": 9.67960546860742e-06, "epoch": 0.19913827437633513, "total_flos": 190325630380646400, "step": 49500 }, { "loss": 6.8284375, "learning_rate": 9.678794343211489e-06, "epoch": 0.19954057392052973, "total_flos": 190721153279016960, "step": 49600 }, { "loss": 6.7865625, "learning_rate": 9.677983217815559e-06, "epoch": 0.19994287346472436, "total_flos": 191113149512540160, "step": 49700 }, { "loss": 6.8171875, "learning_rate": 9.677172092419629e-06, "epoch": 0.200345173008919, "total_flos": 191493700019036160, "step": 49800 }, { "loss": 6.7828125, "learning_rate": 9.676360967023697e-06, "epoch": 0.2007474725531136, "total_flos": 191879790151188480, "step": 49900 }, { "loss": 6.7540625, "learning_rate": 9.675549841627767e-06, "epoch": 0.20114977209730822, "total_flos": 192260574352343040, "step": 50000 }, { "loss": 6.761875, "learning_rate": 9.674738716231836e-06, "epoch": 0.20155207164150282, "total_flos": 192644863973376000, "step": 50100 }, { "loss": 6.79625, "learning_rate": 9.673927590835906e-06, "epoch": 0.20195437118569745, "total_flos": 193030741655838720, "step": 50200 }, { "loss": 6.8009375, "learning_rate": 9.673116465439976e-06, "epoch": 0.20235667072989205, "total_flos": 193410256470097920, "step": 50300 }, { "loss": 6.7209375, "learning_rate": 9.672305340044044e-06, "epoch": 0.20275897027408668, "total_flos": 193791423080693760, "step": 50400 }, { "loss": 6.714375, "learning_rate": 9.671494214648114e-06, "epoch": 0.2031612698182813, "total_flos": 194167273137807360, "step": 50500 }, { "loss": 6.7484375, "learning_rate": 9.670683089252184e-06, "epoch": 0.2035635693624759, "total_flos": 194547276586352640, "step": 50600 }, { "loss": 6.7778125, "learning_rate": 9.669871963856252e-06, "epoch": 0.20396586890667054, "total_flos": 194953124539637760, "step": 50700 }, { "loss": 6.7421875, "learning_rate": 9.669060838460323e-06, "epoch": 0.20436816845086514, "total_flos": 195347038131609600, "step": 50800 }, { "loss": 6.6890625, "learning_rate": 9.668249713064391e-06, "epoch": 0.20477046799505977, "total_flos": 195742284845383680, "step": 50900 }, { "loss": 6.7509375, "learning_rate": 9.667438587668463e-06, "epoch": 0.20517276753925437, "total_flos": 196125395370639360, "step": 51000 }, { "loss": 6.749375, "learning_rate": 9.666627462272531e-06, "epoch": 0.205575067083449, "total_flos": 196506147704340480, "step": 51100 }, { "loss": 6.679375, "learning_rate": 9.6658163368766e-06, "epoch": 0.2059773666276436, "total_flos": 196890474504069120, "step": 51200 }, { "loss": 6.7240625, "learning_rate": 9.66500521148067e-06, "epoch": 0.20637966617183823, "total_flos": 197253816585707520, "step": 51300 }, { "loss": 6.7334375, "learning_rate": 9.66419408608474e-06, "epoch": 0.20678196571603286, "total_flos": 197636911177236480, "step": 51400 }, { "loss": 6.704375, "learning_rate": 9.663382960688808e-06, "epoch": 0.20718426526022746, "total_flos": 198019522445721600, "step": 51500 }, { "loss": 6.7140625, "learning_rate": 9.662571835292878e-06, "epoch": 0.20758656480442209, "total_flos": 198397847541719040, "step": 51600 }, { "loss": 6.695625, "learning_rate": 9.661760709896946e-06, "epoch": 0.2079888643486167, "total_flos": 198777367667220480, "step": 51700 }, { "loss": 6.7225, "learning_rate": 9.660949584501018e-06, "epoch": 0.20839116389281132, "total_flos": 199169730376458240, "step": 51800 }, { "loss": 6.7159375, "learning_rate": 9.660138459105086e-06, "epoch": 0.20879346343700592, "total_flos": 199547131316305920, "step": 51900 }, { "loss": 6.7090625, "learning_rate": 9.659327333709155e-06, "epoch": 0.20919576298120054, "total_flos": 199937539488399360, "step": 52000 }, { "loss": 6.665, "learning_rate": 9.658516208313225e-06, "epoch": 0.20959806252539515, "total_flos": 200312523813027840, "step": 52100 }, { "loss": 6.6675, "learning_rate": 9.657705082917295e-06, "epoch": 0.21000036206958977, "total_flos": 200701343923691520, "step": 52200 }, { "loss": 6.6990625, "learning_rate": 9.656893957521363e-06, "epoch": 0.2104026616137844, "total_flos": 201094099664855040, "step": 52300 }, { "loss": 6.651875, "learning_rate": 9.656082832125433e-06, "epoch": 0.210804961157979, "total_flos": 201485070828625920, "step": 52400 }, { "loss": 6.7134375, "learning_rate": 9.655271706729501e-06, "epoch": 0.21120726070217363, "total_flos": 201870555479162880, "step": 52500 }, { "loss": 6.695625, "learning_rate": 9.654460581333573e-06, "epoch": 0.21160956024636823, "total_flos": 202258525791068160, "step": 52600 }, { "loss": 6.6375, "learning_rate": 9.653649455937641e-06, "epoch": 0.21201185979056286, "total_flos": 202637026158059520, "step": 52700 }, { "loss": 6.6825, "learning_rate": 9.65283833054171e-06, "epoch": 0.21241415933475746, "total_flos": 203028135414128640, "step": 52800 }, { "loss": 6.6515625, "learning_rate": 9.652027205145782e-06, "epoch": 0.2128164588789521, "total_flos": 203397436709560320, "step": 52900 }, { "loss": 6.6759375, "learning_rate": 9.65121607974985e-06, "epoch": 0.21321875842314672, "total_flos": 203774428683755520, "step": 53000 }, { "loss": 6.68375, "learning_rate": 9.65040495435392e-06, "epoch": 0.21362105796734132, "total_flos": 204134398126571520, "step": 53100 }, { "loss": 6.631875, "learning_rate": 9.649593828957988e-06, "epoch": 0.21402335751153595, "total_flos": 204514284727787520, "step": 53200 }, { "loss": 6.6165625, "learning_rate": 9.648782703562058e-06, "epoch": 0.21442565705573055, "total_flos": 204882385682472960, "step": 53300 }, { "loss": 6.65375, "learning_rate": 9.647971578166128e-06, "epoch": 0.21482795659992518, "total_flos": 205262697183068160, "step": 53400 }, { "loss": 6.6959375, "learning_rate": 9.647160452770197e-06, "epoch": 0.21523025614411978, "total_flos": 205635774771732480, "step": 53500 }, { "loss": 6.5909375, "learning_rate": 9.646349327374265e-06, "epoch": 0.2156325556883144, "total_flos": 206011035280957440, "step": 53600 }, { "loss": 6.643125, "learning_rate": 9.645538201978337e-06, "epoch": 0.216034855232509, "total_flos": 206388781451550720, "step": 53700 }, { "loss": 6.6953125, "learning_rate": 9.644727076582405e-06, "epoch": 0.21643715477670364, "total_flos": 206775519555256320, "step": 53800 }, { "loss": 6.68375, "learning_rate": 9.643915951186475e-06, "epoch": 0.21683945432089827, "total_flos": 207154896277217280, "step": 53900 }, { "loss": 6.6021875, "learning_rate": 9.643104825790544e-06, "epoch": 0.21724175386509287, "total_flos": 207546425121423360, "step": 54000 }, { "loss": 6.6328125, "learning_rate": 9.642293700394614e-06, "epoch": 0.2176440534092875, "total_flos": 207921712186859520, "step": 54100 }, { "loss": 6.5553125, "learning_rate": 9.641482574998684e-06, "epoch": 0.2180463529534821, "total_flos": 208316719894732800, "step": 54200 }, { "loss": 6.6646875, "learning_rate": 9.640671449602752e-06, "epoch": 0.21844865249767673, "total_flos": 208676285683138560, "step": 54300 }, { "loss": 6.6625, "learning_rate": 9.63986032420682e-06, "epoch": 0.21885095204187133, "total_flos": 209076562143313920, "step": 54400 }, { "loss": 6.641875, "learning_rate": 9.639049198810892e-06, "epoch": 0.21925325158606596, "total_flos": 209457011736207360, "step": 54500 }, { "loss": 6.6603125, "learning_rate": 9.63823807341496e-06, "epoch": 0.21965555113026056, "total_flos": 209858504470855680, "step": 54600 }, { "loss": 6.5734375, "learning_rate": 9.63742694801903e-06, "epoch": 0.22005785067445519, "total_flos": 210259673219727360, "step": 54700 }, { "loss": 6.64, "learning_rate": 9.636615822623099e-06, "epoch": 0.22046015021864981, "total_flos": 210645742106910720, "step": 54800 }, { "loss": 6.5840625, "learning_rate": 9.635804697227169e-06, "epoch": 0.22086244976284441, "total_flos": 211016918270853120, "step": 54900 }, { "loss": 6.5815625, "learning_rate": 9.634993571831239e-06, "epoch": 0.22126474930703904, "total_flos": 211394149250949120, "step": 55000 }, { "loss": 6.5915625, "learning_rate": 9.634182446435307e-06, "epoch": 0.22166704885123364, "total_flos": 211783659823104000, "step": 55100 }, { "loss": 6.58375, "learning_rate": 9.633371321039376e-06, "epoch": 0.22206934839542827, "total_flos": 212154623537356800, "step": 55200 }, { "loss": 6.625, "learning_rate": 9.632560195643447e-06, "epoch": 0.22247164793962287, "total_flos": 212550257971814400, "step": 55300 }, { "loss": 6.616875, "learning_rate": 9.631749070247516e-06, "epoch": 0.2228739474838175, "total_flos": 212946062366023680, "step": 55400 }, { "loss": 6.53, "learning_rate": 9.630937944851586e-06, "epoch": 0.2232762470280121, "total_flos": 213332773913518080, "step": 55500 }, { "loss": 6.553125, "learning_rate": 9.630126819455654e-06, "epoch": 0.22367854657220673, "total_flos": 213720823894056960, "step": 55600 }, { "loss": 6.5628125, "learning_rate": 9.629315694059724e-06, "epoch": 0.22408084611640136, "total_flos": 214103918485585920, "step": 55700 }, { "loss": 6.55875, "learning_rate": 9.628504568663794e-06, "epoch": 0.22448314566059596, "total_flos": 214497316887060480, "step": 55800 }, { "loss": 6.5771875, "learning_rate": 9.627693443267863e-06, "epoch": 0.2248854452047906, "total_flos": 214879614792253440, "step": 55900 }, { "loss": 6.5553125, "learning_rate": 9.626882317871933e-06, "epoch": 0.2252877447489852, "total_flos": 215254880612720640, "step": 56000 }, { "loss": 6.58125, "learning_rate": 9.626071192476003e-06, "epoch": 0.22569004429317982, "total_flos": 215642542872576000, "step": 56100 }, { "loss": 6.5334375, "learning_rate": 9.625260067080071e-06, "epoch": 0.22609234383737442, "total_flos": 216023236782612480, "step": 56200 }, { "loss": 6.6175, "learning_rate": 9.624448941684141e-06, "epoch": 0.22649464338156905, "total_flos": 216405311615631360, "step": 56300 }, { "loss": 6.51625, "learning_rate": 9.62363781628821e-06, "epoch": 0.22689694292576368, "total_flos": 216789919911198720, "step": 56400 }, { "loss": 6.5021875, "learning_rate": 9.62282669089228e-06, "epoch": 0.22729924246995828, "total_flos": 217176334029127680, "step": 56500 }, { "loss": 6.5309375, "learning_rate": 9.62201556549635e-06, "epoch": 0.2277015420141529, "total_flos": 217572058754703360, "step": 56600 }, { "loss": 6.543125, "learning_rate": 9.621204440100418e-06, "epoch": 0.2281038415583475, "total_flos": 217950155467284480, "step": 56700 }, { "loss": 6.5315625, "learning_rate": 9.620393314704488e-06, "epoch": 0.22850614110254214, "total_flos": 218328262802350080, "step": 56800 }, { "loss": 6.501875, "learning_rate": 9.619582189308558e-06, "epoch": 0.22890844064673674, "total_flos": 218721045099724800, "step": 56900 }, { "loss": 6.48375, "learning_rate": 9.618771063912626e-06, "epoch": 0.22931074019093137, "total_flos": 219111920661135360, "step": 57000 }, { "loss": 6.5340625, "learning_rate": 9.617959938516696e-06, "epoch": 0.22971303973512597, "total_flos": 219487993790423040, "step": 57100 }, { "loss": 6.56125, "learning_rate": 9.617148813120765e-06, "epoch": 0.2301153392793206, "total_flos": 219879697905623040, "step": 57200 }, { "loss": 6.473125, "learning_rate": 9.616337687724835e-06, "epoch": 0.23051763882351523, "total_flos": 220241829024030720, "step": 57300 }, { "loss": 6.5040625, "learning_rate": 9.615526562328905e-06, "epoch": 0.23091993836770983, "total_flos": 220615235909713920, "step": 57400 }, { "loss": 6.530625, "learning_rate": 9.614715436932973e-06, "epoch": 0.23132223791190445, "total_flos": 221004688058204160, "step": 57500 }, { "loss": 6.5290625, "learning_rate": 9.613904311537043e-06, "epoch": 0.23172453745609906, "total_flos": 221396806450298880, "step": 57600 }, { "loss": 6.5275, "learning_rate": 9.613093186141113e-06, "epoch": 0.23212683700029368, "total_flos": 221782822225059840, "step": 57700 }, { "loss": 6.4825, "learning_rate": 9.612282060745181e-06, "epoch": 0.23252913654448829, "total_flos": 222184474296975360, "step": 57800 }, { "loss": 6.5321875, "learning_rate": 9.611470935349252e-06, "epoch": 0.2329314360886829, "total_flos": 222560207506759680, "step": 57900 }, { "loss": 6.4990625, "learning_rate": 9.610659809953322e-06, "epoch": 0.23333373563287751, "total_flos": 222956049079664640, "step": 58000 }, { "loss": 6.458125, "learning_rate": 9.60984868455739e-06, "epoch": 0.23373603517707214, "total_flos": 223348130293063680, "step": 58100 }, { "loss": 6.4415625, "learning_rate": 9.60903755916146e-06, "epoch": 0.23413833472126677, "total_flos": 223726795308564480, "step": 58200 }, { "loss": 6.5328125, "learning_rate": 9.608226433765528e-06, "epoch": 0.23454063426546137, "total_flos": 224125903295447040, "step": 58300 }, { "loss": 6.4553125, "learning_rate": 9.607415308369598e-06, "epoch": 0.234942933809656, "total_flos": 224507547917844480, "step": 58400 }, { "loss": 6.4625, "learning_rate": 9.606604182973668e-06, "epoch": 0.2353452333538506, "total_flos": 224894758722109440, "step": 58500 }, { "loss": 6.515, "learning_rate": 9.605793057577737e-06, "epoch": 0.23574753289804523, "total_flos": 225281066615193600, "step": 58600 }, { "loss": 6.4815625, "learning_rate": 9.604981932181807e-06, "epoch": 0.23614983244223983, "total_flos": 225647128052858880, "step": 58700 }, { "loss": 6.465625, "learning_rate": 9.604170806785877e-06, "epoch": 0.23655213198643446, "total_flos": 226036489910231040, "step": 58800 }, { "loss": 6.4559375, "learning_rate": 9.603359681389945e-06, "epoch": 0.2369544315306291, "total_flos": 226423998144061440, "step": 58900 }, { "loss": 6.35875, "learning_rate": 9.602548555994015e-06, "epoch": 0.2373567310748237, "total_flos": 226792385905827840, "step": 59000 }, { "loss": 6.4409375, "learning_rate": 9.601737430598084e-06, "epoch": 0.23775903061901832, "total_flos": 227180993566801920, "step": 59100 }, { "loss": 6.3896875, "learning_rate": 9.600926305202154e-06, "epoch": 0.23816133016321292, "total_flos": 227563450809262080, "step": 59200 }, { "loss": 6.4034375, "learning_rate": 9.600115179806224e-06, "epoch": 0.23856362970740755, "total_flos": 227944134096814080, "step": 59300 }, { "loss": 6.3978125, "learning_rate": 9.599304054410292e-06, "epoch": 0.23896592925160215, "total_flos": 228326665696665600, "step": 59400 }, { "loss": 6.419375, "learning_rate": 9.598492929014362e-06, "epoch": 0.23936822879579678, "total_flos": 228709075137945600, "step": 59500 }, { "loss": 6.406875, "learning_rate": 9.597681803618432e-06, "epoch": 0.23977052833999138, "total_flos": 229094586344693760, "step": 59600 }, { "loss": 6.418125, "learning_rate": 9.5968706782225e-06, "epoch": 0.240172827884186, "total_flos": 229479460202373120, "step": 59700 }, { "loss": 6.4375, "learning_rate": 9.59605955282657e-06, "epoch": 0.24057512742838064, "total_flos": 229863749823406080, "step": 59800 }, { "loss": 6.4871875, "learning_rate": 9.595248427430639e-06, "epoch": 0.24097742697257524, "total_flos": 230246690388910080, "step": 59900 }, { "loss": 6.5125, "learning_rate": 9.594437302034709e-06, "epoch": 0.24137972651676987, "total_flos": 230631033122365440, "step": 60000 }, { "loss": 6.4303125, "learning_rate": 9.593626176638779e-06, "epoch": 0.24178202606096447, "total_flos": 231009814985195520, "step": 60100 }, { "loss": 6.376875, "learning_rate": 9.592815051242847e-06, "epoch": 0.2421843256051591, "total_flos": 231383551167897600, "step": 60200 }, { "loss": 6.3459375, "learning_rate": 9.592003925846917e-06, "epoch": 0.2425866251493537, "total_flos": 231751099753390080, "step": 60300 }, { "loss": 6.371875, "learning_rate": 9.591192800450987e-06, "epoch": 0.24298892469354832, "total_flos": 232112009286082560, "step": 60400 }, { "loss": 6.4053125, "learning_rate": 9.590381675055056e-06, "epoch": 0.24339122423774293, "total_flos": 232506326532464640, "step": 60500 }, { "loss": 6.4328125, "learning_rate": 9.589570549659126e-06, "epoch": 0.24379352378193755, "total_flos": 232880800977838080, "step": 60600 }, { "loss": 6.331875, "learning_rate": 9.588759424263194e-06, "epoch": 0.24419582332613218, "total_flos": 233260746002718720, "step": 60700 }, { "loss": 6.3478125, "learning_rate": 9.587948298867264e-06, "epoch": 0.24459812287032678, "total_flos": 233652073019719680, "step": 60800 }, { "loss": 6.394375, "learning_rate": 9.587137173471334e-06, "epoch": 0.2450004224145214, "total_flos": 234037833854853120, "step": 60900 }, { "loss": 6.4, "learning_rate": 9.586326048075403e-06, "epoch": 0.245402721958716, "total_flos": 234424317018931200, "step": 61000 }, { "loss": 6.438125, "learning_rate": 9.585514922679473e-06, "epoch": 0.24580502150291064, "total_flos": 234819861162270720, "step": 61100 }, { "loss": 6.4084375, "learning_rate": 9.584703797283543e-06, "epoch": 0.24620732104710524, "total_flos": 235212712505794560, "step": 61200 }, { "loss": 6.3884375, "learning_rate": 9.583892671887611e-06, "epoch": 0.24660962059129987, "total_flos": 235589385805455360, "step": 61300 }, { "loss": 6.35625, "learning_rate": 9.583081546491681e-06, "epoch": 0.24701192013549447, "total_flos": 235970010669342720, "step": 61400 }, { "loss": 6.383125, "learning_rate": 9.58227042109575e-06, "epoch": 0.2474142196796891, "total_flos": 236346992021053440, "step": 61500 }, { "loss": 6.31125, "learning_rate": 9.58145929569982e-06, "epoch": 0.24781651922388373, "total_flos": 236715937463255040, "step": 61600 }, { "loss": 6.359375, "learning_rate": 9.58064817030389e-06, "epoch": 0.24821881876807833, "total_flos": 237101852324413440, "step": 61700 }, { "loss": 6.400625, "learning_rate": 9.579837044907958e-06, "epoch": 0.24862111831227296, "total_flos": 237494273457315840, "step": 61800 }, { "loss": 6.3378125, "learning_rate": 9.579025919512028e-06, "epoch": 0.24902341785646756, "total_flos": 237881808247357440, "step": 61900 }, { "loss": 6.3815625, "learning_rate": 9.578214794116098e-06, "epoch": 0.2494257174006622, "total_flos": 238268636642181120, "step": 62000 }, { "loss": 6.381875, "learning_rate": 9.577403668720166e-06, "epoch": 0.2498280169448568, "total_flos": 238648857851658240, "step": 62100 }, { "loss": 6.3959375, "learning_rate": 9.576592543324236e-06, "epoch": 0.2502303164890514, "total_flos": 239019417911500800, "step": 62200 }, { "loss": 6.375, "learning_rate": 9.575781417928306e-06, "epoch": 0.25063261603324605, "total_flos": 239416173018071040, "step": 62300 }, { "loss": 6.3378125, "learning_rate": 9.574970292532375e-06, "epoch": 0.2510349155774407, "total_flos": 239793393375682560, "step": 62400 }, { "loss": 6.3225, "learning_rate": 9.574159167136445e-06, "epoch": 0.25143721512163525, "total_flos": 240178161008517120, "step": 62500 }, { "loss": 6.2959375, "learning_rate": 9.573348041740513e-06, "epoch": 0.2518395146658299, "total_flos": 240546463790407680, "step": 62600 }, { "loss": 6.3715625, "learning_rate": 9.572536916344583e-06, "epoch": 0.2522418142100245, "total_flos": 240938114793185280, "step": 62700 }, { "loss": 6.2559375, "learning_rate": 9.571725790948653e-06, "epoch": 0.25264411375421914, "total_flos": 241327694411489280, "step": 62800 }, { "loss": 6.295625, "learning_rate": 9.570914665552721e-06, "epoch": 0.2530464132984137, "total_flos": 241723737811599360, "step": 62900 }, { "loss": 6.3271875, "learning_rate": 9.570103540156792e-06, "epoch": 0.25344871284260834, "total_flos": 242101765478031360, "step": 63000 }, { "loss": 6.3178125, "learning_rate": 9.569292414760862e-06, "epoch": 0.25385101238680297, "total_flos": 242488530137948160, "step": 63100 }, { "loss": 6.3053125, "learning_rate": 9.56848128936493e-06, "epoch": 0.2542533119309976, "total_flos": 242878529344389120, "step": 63200 }, { "loss": 6.389375, "learning_rate": 9.567670163969e-06, "epoch": 0.2546556114751922, "total_flos": 243273791991889920, "step": 63300 }, { "loss": 6.2815625, "learning_rate": 9.566859038573068e-06, "epoch": 0.2550579110193868, "total_flos": 243642041661358080, "step": 63400 }, { "loss": 6.326875, "learning_rate": 9.566047913177138e-06, "epoch": 0.2554602105635814, "total_flos": 244030548408729600, "step": 63500 }, { "loss": 6.2778125, "learning_rate": 9.565236787781208e-06, "epoch": 0.25586251010777605, "total_flos": 244414174124482560, "step": 63600 }, { "loss": 6.285, "learning_rate": 9.564425662385277e-06, "epoch": 0.2562648096519707, "total_flos": 244820388553482240, "step": 63700 }, { "loss": 6.2803125, "learning_rate": 9.563614536989347e-06, "epoch": 0.25666710919616526, "total_flos": 245201183377121280, "step": 63800 }, { "loss": 6.285, "learning_rate": 9.562803411593417e-06, "epoch": 0.2570694087403599, "total_flos": 245590858597785600, "step": 63900 }, { "loss": 6.31125, "learning_rate": 9.561992286197485e-06, "epoch": 0.2574717082845545, "total_flos": 245982339640811520, "step": 64000 }, { "loss": 6.2390625, "learning_rate": 9.561181160801555e-06, "epoch": 0.25787400782874914, "total_flos": 246365466099793920, "step": 64100 }, { "loss": 6.26625, "learning_rate": 9.560370035405624e-06, "epoch": 0.25827630737294377, "total_flos": 246756787805552640, "step": 64200 }, { "loss": 6.26625, "learning_rate": 9.559558910009694e-06, "epoch": 0.25867860691713834, "total_flos": 247139202558074880, "step": 64300 }, { "loss": 6.2615625, "learning_rate": 9.558747784613764e-06, "epoch": 0.25908090646133297, "total_flos": 247515998016307200, "step": 64400 }, { "loss": 6.2375, "learning_rate": 9.557936659217832e-06, "epoch": 0.2594832060055276, "total_flos": 247908360725544960, "step": 64500 }, { "loss": 6.2875, "learning_rate": 9.557125533821902e-06, "epoch": 0.25988550554972223, "total_flos": 248294987293163520, "step": 64600 }, { "loss": 6.2796875, "learning_rate": 9.556314408425972e-06, "epoch": 0.2602878050939168, "total_flos": 248651541607219200, "step": 64700 }, { "loss": 6.2475, "learning_rate": 9.55550328303004e-06, "epoch": 0.26069010463811143, "total_flos": 249036102101606400, "step": 64800 }, { "loss": 6.293125, "learning_rate": 9.55469215763411e-06, "epoch": 0.26109240418230606, "total_flos": 249430376858050560, "step": 64900 }, { "loss": 6.3028125, "learning_rate": 9.553881032238179e-06, "epoch": 0.2614947037265007, "total_flos": 249812865967964160, "step": 65000 }, { "loss": 6.2790625, "learning_rate": 9.553069906842249e-06, "epoch": 0.2618970032706953, "total_flos": 250184743215882240, "step": 65100 }, { "loss": 6.230625, "learning_rate": 9.552258781446319e-06, "epoch": 0.2622993028148899, "total_flos": 250562574366351360, "step": 65200 }, { "loss": 6.24125, "learning_rate": 9.551447656050387e-06, "epoch": 0.2627016023590845, "total_flos": 250935811292282880, "step": 65300 }, { "loss": 6.2771875, "learning_rate": 9.550636530654457e-06, "epoch": 0.26310390190327915, "total_flos": 251319814106234880, "step": 65400 }, { "loss": 6.2084375, "learning_rate": 9.549825405258527e-06, "epoch": 0.2635062014474738, "total_flos": 251687283023093760, "step": 65500 }, { "loss": 6.25, "learning_rate": 9.549014279862596e-06, "epoch": 0.26390850099166835, "total_flos": 252079305812828160, "step": 65600 }, { "loss": 6.22875, "learning_rate": 9.548203154466666e-06, "epoch": 0.264310800535863, "total_flos": 252463117422059520, "step": 65700 }, { "loss": 6.23125, "learning_rate": 9.547392029070734e-06, "epoch": 0.2647131000800576, "total_flos": 252847948789800960, "step": 65800 }, { "loss": 6.2415625, "learning_rate": 9.546580903674804e-06, "epoch": 0.26511539962425223, "total_flos": 253243716005314560, "step": 65900 }, { "loss": 6.205, "learning_rate": 9.545769778278874e-06, "epoch": 0.26551769916844686, "total_flos": 253622391643299840, "step": 66000 }, { "loss": 6.205625, "learning_rate": 9.544958652882943e-06, "epoch": 0.26591999871264144, "total_flos": 253993907726745600, "step": 66100 }, { "loss": 6.161875, "learning_rate": 9.544147527487013e-06, "epoch": 0.26632229825683607, "total_flos": 254360919876771840, "step": 66200 }, { "loss": 6.1334375, "learning_rate": 9.543336402091083e-06, "epoch": 0.2667245978010307, "total_flos": 254733253891522560, "step": 66300 }, { "loss": 6.2140625, "learning_rate": 9.542525276695151e-06, "epoch": 0.2671268973452253, "total_flos": 255118738542059520, "step": 66400 }, { "loss": 6.2096875, "learning_rate": 9.541714151299221e-06, "epoch": 0.26752919688941995, "total_flos": 255515169662853120, "step": 66500 }, { "loss": 6.2596875, "learning_rate": 9.54090302590329e-06, "epoch": 0.2679314964336145, "total_flos": 255895789215498240, "step": 66600 }, { "loss": 6.2075, "learning_rate": 9.54009190050736e-06, "epoch": 0.26833379597780915, "total_flos": 256271256863170560, "step": 66700 }, { "loss": 6.22625, "learning_rate": 9.53928077511143e-06, "epoch": 0.2687360955220038, "total_flos": 256655960761098240, "step": 66800 }, { "loss": 6.210625, "learning_rate": 9.538469649715498e-06, "epoch": 0.2691383950661984, "total_flos": 257048785548410880, "step": 66900 }, { "loss": 6.1825, "learning_rate": 9.537658524319568e-06, "epoch": 0.269540694610393, "total_flos": 257452636474613760, "step": 67000 }, { "loss": 6.1803125, "learning_rate": 9.536847398923638e-06, "epoch": 0.2699429941545876, "total_flos": 257845084163727360, "step": 67100 }, { "loss": 6.199375, "learning_rate": 9.536036273527706e-06, "epoch": 0.27034529369878224, "total_flos": 258230855621345280, "step": 67200 }, { "loss": 6.190625, "learning_rate": 9.535225148131776e-06, "epoch": 0.27074759324297687, "total_flos": 258603587979264000, "step": 67300 }, { "loss": 6.156875, "learning_rate": 9.534414022735846e-06, "epoch": 0.2711498927871715, "total_flos": 258990618201292800, "step": 67400 }, { "loss": 6.179375, "learning_rate": 9.533602897339915e-06, "epoch": 0.27155219233136607, "total_flos": 259358841314549760, "step": 67500 }, { "loss": 6.1475, "learning_rate": 9.532791771943985e-06, "epoch": 0.2719544918755607, "total_flos": 259743805463347200, "step": 67600 }, { "loss": 6.1803125, "learning_rate": 9.531980646548053e-06, "epoch": 0.27235679141975533, "total_flos": 260127133749534720, "step": 67700 }, { "loss": 6.12375, "learning_rate": 9.531169521152123e-06, "epoch": 0.27275909096394996, "total_flos": 260508518121062400, "step": 67800 }, { "loss": 6.1903125, "learning_rate": 9.530358395756193e-06, "epoch": 0.27316139050814453, "total_flos": 260890970052280320, "step": 67900 }, { "loss": 6.11875, "learning_rate": 9.529547270360261e-06, "epoch": 0.27356369005233916, "total_flos": 261266235872747520, "step": 68000 }, { "loss": 6.1171875, "learning_rate": 9.528736144964332e-06, "epoch": 0.2739659895965338, "total_flos": 261641140528742400, "step": 68100 }, { "loss": 6.153125, "learning_rate": 9.527925019568402e-06, "epoch": 0.2743682891407284, "total_flos": 262043642399416320, "step": 68200 }, { "loss": 6.124375, "learning_rate": 9.52711389417247e-06, "epoch": 0.27477058868492304, "total_flos": 262440854272819200, "step": 68300 }, { "loss": 6.1534375, "learning_rate": 9.52630276877654e-06, "epoch": 0.2751728882291176, "total_flos": 262833525034106880, "step": 68400 }, { "loss": 6.12, "learning_rate": 9.525491643380608e-06, "epoch": 0.27557518777331225, "total_flos": 263205779380224000, "step": 68500 }, { "loss": 6.176875, "learning_rate": 9.524680517984678e-06, "epoch": 0.2759774873175069, "total_flos": 263610421681520640, "step": 68600 }, { "loss": 6.2346875, "learning_rate": 9.523869392588748e-06, "epoch": 0.2763797868617015, "total_flos": 264002874681876480, "step": 68700 }, { "loss": 6.1278125, "learning_rate": 9.523058267192817e-06, "epoch": 0.2767820864058961, "total_flos": 264370959702835200, "step": 68800 }, { "loss": 6.160625, "learning_rate": 9.522247141796887e-06, "epoch": 0.2771843859500907, "total_flos": 264777588408729600, "step": 68900 }, { "loss": 6.119375, "learning_rate": 9.521436016400957e-06, "epoch": 0.27758668549428533, "total_flos": 265156986375659520, "step": 69000 }, { "loss": 6.15625, "learning_rate": 9.520624891005025e-06, "epoch": 0.27798898503847996, "total_flos": 265556179342417920, "step": 69100 }, { "loss": 6.0778125, "learning_rate": 9.519813765609095e-06, "epoch": 0.2783912845826746, "total_flos": 265933681195868160, "step": 69200 }, { "loss": 6.0909375, "learning_rate": 9.519002640213164e-06, "epoch": 0.27879358412686917, "total_flos": 266326750300323840, "step": 69300 }, { "loss": 6.0390625, "learning_rate": 9.518191514817234e-06, "epoch": 0.2791958836710638, "total_flos": 266707741639925760, "step": 69400 }, { "loss": 6.0890625, "learning_rate": 9.517380389421304e-06, "epoch": 0.2795981832152584, "total_flos": 267091845367480320, "step": 69500 }, { "loss": 6.1253125, "learning_rate": 9.516569264025372e-06, "epoch": 0.28000048275945305, "total_flos": 267482928067338240, "step": 69600 }, { "loss": 6.0828125, "learning_rate": 9.515758138629442e-06, "epoch": 0.2804027823036476, "total_flos": 267856361509232640, "step": 69700 }, { "loss": 6.1553125, "learning_rate": 9.514947013233512e-06, "epoch": 0.28080508184784225, "total_flos": 268266846176993280, "step": 69800 }, { "loss": 6.0728125, "learning_rate": 9.51413588783758e-06, "epoch": 0.2812073813920369, "total_flos": 268646844314296320, "step": 69900 }, { "loss": 6.0690625, "learning_rate": 9.51332476244165e-06, "epoch": 0.2816096809362315, "total_flos": 269035738782351360, "step": 70000 }, { "loss": 6.1084375, "learning_rate": 9.512513637045719e-06, "epoch": 0.28201198048042614, "total_flos": 269428712284446720, "step": 70100 }, { "loss": 6.1065625, "learning_rate": 9.511702511649789e-06, "epoch": 0.2824142800246207, "total_flos": 269813548963430400, "step": 70200 }, { "loss": 6.0946875, "learning_rate": 9.510891386253859e-06, "epoch": 0.28281657956881534, "total_flos": 270183556654080000, "step": 70300 }, { "loss": 6.13375, "learning_rate": 9.510080260857927e-06, "epoch": 0.28321887911300997, "total_flos": 270560081238958080, "step": 70400 }, { "loss": 6.0784375, "learning_rate": 9.509269135461997e-06, "epoch": 0.2836211786572046, "total_flos": 270950016710492160, "step": 70500 }, { "loss": 6.0209375, "learning_rate": 9.508458010066067e-06, "epoch": 0.28402347820139917, "total_flos": 271322988074311680, "step": 70600 }, { "loss": 6.0859375, "learning_rate": 9.507646884670136e-06, "epoch": 0.2844257777455938, "total_flos": 271699321454469120, "step": 70700 }, { "loss": 6.0525, "learning_rate": 9.506835759274206e-06, "epoch": 0.28482807728978843, "total_flos": 272091492958986240, "step": 70800 }, { "loss": 6.075625, "learning_rate": 9.506024633878274e-06, "epoch": 0.28523037683398306, "total_flos": 272463710126407680, "step": 70900 }, { "loss": 6.01375, "learning_rate": 9.505213508482344e-06, "epoch": 0.2856326763781777, "total_flos": 272845774336942080, "step": 71000 }, { "loss": 6.078125, "learning_rate": 9.504402383086414e-06, "epoch": 0.28603497592237226, "total_flos": 273222511371509760, "step": 71100 }, { "loss": 6.0725, "learning_rate": 9.503591257690483e-06, "epoch": 0.2864372754665669, "total_flos": 273588562186690560, "step": 71200 }, { "loss": 6.0103125, "learning_rate": 9.502780132294553e-06, "epoch": 0.2868395750107615, "total_flos": 273971853294182400, "step": 71300 }, { "loss": 6.0621875, "learning_rate": 9.501969006898623e-06, "epoch": 0.28724187455495614, "total_flos": 274350815739248640, "step": 71400 }, { "loss": 6.079375, "learning_rate": 9.501157881502691e-06, "epoch": 0.2876441740991508, "total_flos": 274754050561351680, "step": 71500 }, { "loss": 6.02125, "learning_rate": 9.500346756106761e-06, "epoch": 0.28804647364334535, "total_flos": 275141919959654400, "step": 71600 }, { "loss": 6.0615625, "learning_rate": 9.499535630710831e-06, "epoch": 0.28844877318754, "total_flos": 275519007536209920, "step": 71700 }, { "loss": 6.0178125, "learning_rate": 9.4987245053149e-06, "epoch": 0.2888510727317346, "total_flos": 275893285465620480, "step": 71800 }, { "loss": 6.0884375, "learning_rate": 9.49791337991897e-06, "epoch": 0.28925337227592923, "total_flos": 276274898220564480, "step": 71900 }, { "loss": 6.0190625, "learning_rate": 9.497102254523038e-06, "epoch": 0.2896556718201238, "total_flos": 276674425795584000, "step": 72000 }, { "loss": 5.9940625, "learning_rate": 9.49629112912711e-06, "epoch": 0.29005797136431843, "total_flos": 277065407581839360, "step": 72100 }, { "loss": 6.12125, "learning_rate": 9.495480003731178e-06, "epoch": 0.29046027090851306, "total_flos": 277450488577966080, "step": 72200 }, { "loss": 6.0253125, "learning_rate": 9.494668878335246e-06, "epoch": 0.2908625704527077, "total_flos": 277836132565770240, "step": 72300 }, { "loss": 6.0153125, "learning_rate": 9.493857752939316e-06, "epoch": 0.2912648699969023, "total_flos": 278206134945177600, "step": 72400 }, { "loss": 6.0328125, "learning_rate": 9.493046627543386e-06, "epoch": 0.2916671695410969, "total_flos": 278595730497208320, "step": 72500 }, { "loss": 6.0028125, "learning_rate": 9.492235502147455e-06, "epoch": 0.2920694690852915, "total_flos": 278987344321290240, "step": 72600 }, { "loss": 5.9884375, "learning_rate": 9.491424376751525e-06, "epoch": 0.29247176862948615, "total_flos": 279371421492633600, "step": 72700 }, { "loss": 6.0503125, "learning_rate": 9.490613251355593e-06, "epoch": 0.2928740681736808, "total_flos": 279752487189626880, "step": 72800 }, { "loss": 6.02625, "learning_rate": 9.489802125959665e-06, "epoch": 0.29327636771787535, "total_flos": 280134126500782080, "step": 72900 }, { "loss": 6.0365625, "learning_rate": 9.488991000563733e-06, "epoch": 0.29367866726207, "total_flos": 280520965518090240, "step": 73000 }, { "loss": 6.0134375, "learning_rate": 9.488179875167801e-06, "epoch": 0.2940809668062646, "total_flos": 280919876989009920, "step": 73100 }, { "loss": 6.0753125, "learning_rate": 9.487368749771872e-06, "epoch": 0.29448326635045924, "total_flos": 281313679044894720, "step": 73200 }, { "loss": 6.0171875, "learning_rate": 9.486557624375942e-06, "epoch": 0.29488556589465387, "total_flos": 281704437758976000, "step": 73300 }, { "loss": 6.066875, "learning_rate": 9.485746498980012e-06, "epoch": 0.29528786543884844, "total_flos": 282090793453240320, "step": 73400 }, { "loss": 5.98375, "learning_rate": 9.48493537358408e-06, "epoch": 0.29569016498304307, "total_flos": 282458453574819840, "step": 73500 }, { "loss": 6.0075, "learning_rate": 9.484124248188148e-06, "epoch": 0.2960924645272377, "total_flos": 282849695611944960, "step": 73600 }, { "loss": 5.9675, "learning_rate": 9.48331312279222e-06, "epoch": 0.2964947640714323, "total_flos": 283226315799183360, "step": 73700 }, { "loss": 5.963125, "learning_rate": 9.482501997396288e-06, "epoch": 0.2968970636156269, "total_flos": 283607593945866240, "step": 73800 }, { "loss": 5.96625, "learning_rate": 9.481690872000357e-06, "epoch": 0.29729936315982153, "total_flos": 283995091557212160, "step": 73900 }, { "loss": 5.95875, "learning_rate": 9.480879746604427e-06, "epoch": 0.29770166270401616, "total_flos": 284375036582092800, "step": 74000 }, { "loss": 5.94, "learning_rate": 9.480068621208497e-06, "epoch": 0.2981039622482108, "total_flos": 284756898965422080, "step": 74100 }, { "loss": 5.97375, "learning_rate": 9.479257495812567e-06, "epoch": 0.2985062617924054, "total_flos": 285135558669680640, "step": 74200 }, { "loss": 5.991875, "learning_rate": 9.478446370416635e-06, "epoch": 0.2989085613366, "total_flos": 285524564673822720, "step": 74300 }, { "loss": 5.925, "learning_rate": 9.477635245020704e-06, "epoch": 0.2993108608807946, "total_flos": 285898826669506560, "step": 74400 }, { "loss": 5.99125, "learning_rate": 9.476824119624775e-06, "epoch": 0.29971316042498924, "total_flos": 286303617685585920, "step": 74500 }, { "loss": 5.973125, "learning_rate": 9.476012994228844e-06, "epoch": 0.3001154599691839, "total_flos": 286684954255933440, "step": 74600 }, { "loss": 5.949375, "learning_rate": 9.475201868832912e-06, "epoch": 0.30051775951337845, "total_flos": 287061468218327040, "step": 74700 }, { "loss": 5.938125, "learning_rate": 9.474390743436982e-06, "epoch": 0.3009200590575731, "total_flos": 287427827085557760, "step": 74800 }, { "loss": 5.9675, "learning_rate": 9.473579618041052e-06, "epoch": 0.3013223586017677, "total_flos": 287808218254786560, "step": 74900 }, { "loss": 5.9075, "learning_rate": 9.472768492645122e-06, "epoch": 0.30172465814596233, "total_flos": 288194021579857920, "step": 75000 }, { "loss": 5.966875, "learning_rate": 9.47195736724919e-06, "epoch": 0.30212695769015696, "total_flos": 288569807902064640, "step": 75100 }, { "loss": 5.913125, "learning_rate": 9.471146241853259e-06, "epoch": 0.30252925723435153, "total_flos": 288955462512353280, "step": 75200 }, { "loss": 5.978125, "learning_rate": 9.47033511645733e-06, "epoch": 0.30293155677854616, "total_flos": 289342853898854400, "step": 75300 }, { "loss": 5.94875, "learning_rate": 9.469523991061399e-06, "epoch": 0.3033338563227408, "total_flos": 289735529971384320, "step": 75400 }, { "loss": 5.88625, "learning_rate": 9.468712865665469e-06, "epoch": 0.3037361558669354, "total_flos": 290110259356385280, "step": 75500 }, { "loss": 5.911875, "learning_rate": 9.467901740269537e-06, "epoch": 0.30413845541113, "total_flos": 290495425332387840, "step": 75600 }, { "loss": 6.000625, "learning_rate": 9.467090614873607e-06, "epoch": 0.3045407549553246, "total_flos": 290887718995476480, "step": 75700 }, { "loss": 5.9425, "learning_rate": 9.466279489477677e-06, "epoch": 0.30494305449951925, "total_flos": 291273702902784000, "step": 75800 }, { "loss": 5.928125, "learning_rate": 9.465468364081746e-06, "epoch": 0.3053453540437139, "total_flos": 291653610748968960, "step": 75900 }, { "loss": 5.873125, "learning_rate": 9.464657238685814e-06, "epoch": 0.3057476535879085, "total_flos": 292043200989757440, "step": 76000 }, { "loss": 5.905, "learning_rate": 9.463846113289886e-06, "epoch": 0.3061499531321031, "total_flos": 292431389062594560, "step": 76100 }, { "loss": 5.915625, "learning_rate": 9.463034987893954e-06, "epoch": 0.3065522526762977, "total_flos": 292813676345303040, "step": 76200 }, { "loss": 5.959375, "learning_rate": 9.462223862498024e-06, "epoch": 0.30695455222049234, "total_flos": 293207430600007680, "step": 76300 }, { "loss": 5.960625, "learning_rate": 9.461412737102094e-06, "epoch": 0.30735685176468697, "total_flos": 293592564708556800, "step": 76400 }, { "loss": 5.9125, "learning_rate": 9.460601611706163e-06, "epoch": 0.30775915130888154, "total_flos": 293982038102016000, "step": 76500 }, { "loss": 5.92875, "learning_rate": 9.459790486310233e-06, "epoch": 0.30816145085307617, "total_flos": 294364946800066560, "step": 76600 }, { "loss": 5.909375, "learning_rate": 9.458979360914301e-06, "epoch": 0.3085637503972708, "total_flos": 294726403390709760, "step": 76700 }, { "loss": 5.9175, "learning_rate": 9.458168235518371e-06, "epoch": 0.3089660499414654, "total_flos": 295102210957885440, "step": 76800 }, { "loss": 5.914375, "learning_rate": 9.457357110122441e-06, "epoch": 0.30936834948566005, "total_flos": 295495083546378240, "step": 76900 }, { "loss": 5.90375, "learning_rate": 9.45654598472651e-06, "epoch": 0.30977064902985463, "total_flos": 295881906629959680, "step": 77000 }, { "loss": 5.92625, "learning_rate": 9.45573485933058e-06, "epoch": 0.31017294857404926, "total_flos": 296254729278996480, "step": 77100 }, { "loss": 5.874375, "learning_rate": 9.45492373393465e-06, "epoch": 0.3105752481182439, "total_flos": 296639087946178560, "step": 77200 }, { "loss": 5.85, "learning_rate": 9.454112608538718e-06, "epoch": 0.3109775476624385, "total_flos": 297007757203783680, "step": 77300 }, { "loss": 5.869375, "learning_rate": 9.453301483142788e-06, "epoch": 0.31137984720663314, "total_flos": 297385880472576000, "step": 77400 }, { "loss": 5.9425, "learning_rate": 9.452490357746856e-06, "epoch": 0.3117821467508277, "total_flos": 297764104654970880, "step": 77500 }, { "loss": 5.87125, "learning_rate": 9.451679232350926e-06, "epoch": 0.31218444629502234, "total_flos": 298155166109859840, "step": 77600 }, { "loss": 5.89625, "learning_rate": 9.450868106954996e-06, "epoch": 0.312586745839217, "total_flos": 298559070148485120, "step": 77700 }, { "loss": 5.861875, "learning_rate": 9.450056981559065e-06, "epoch": 0.3129890453834116, "total_flos": 298941256517591040, "step": 77800 }, { "loss": 5.93625, "learning_rate": 9.449245856163135e-06, "epoch": 0.3133913449276062, "total_flos": 299329014379806720, "step": 77900 }, { "loss": 5.809375, "learning_rate": 9.448434730767205e-06, "epoch": 0.3137936444718008, "total_flos": 299705671745740800, "step": 78000 }, { "loss": 5.88, "learning_rate": 9.447623605371273e-06, "epoch": 0.31419594401599543, "total_flos": 300085324652298240, "step": 78100 }, { "loss": 5.868125, "learning_rate": 9.446812479975343e-06, "epoch": 0.31459824356019006, "total_flos": 300460935703511040, "step": 78200 }, { "loss": 5.88875, "learning_rate": 9.446001354579412e-06, "epoch": 0.3150005431043847, "total_flos": 300851078313492480, "step": 78300 }, { "loss": 5.885625, "learning_rate": 9.445190229183482e-06, "epoch": 0.31540284264857926, "total_flos": 301253197774725120, "step": 78400 }, { "loss": 5.82875, "learning_rate": 9.444379103787552e-06, "epoch": 0.3158051421927739, "total_flos": 301635718752092160, "step": 78500 }, { "loss": 5.8975, "learning_rate": 9.44356797839162e-06, "epoch": 0.3162074417369685, "total_flos": 302017538645483520, "step": 78600 }, { "loss": 5.890625, "learning_rate": 9.44275685299569e-06, "epoch": 0.31660974128116315, "total_flos": 302408982509813760, "step": 78700 }, { "loss": 5.88875, "learning_rate": 9.44194572759976e-06, "epoch": 0.3170120408253577, "total_flos": 302804107065016320, "step": 78800 }, { "loss": 5.85125, "learning_rate": 9.441134602203828e-06, "epoch": 0.31741434036955235, "total_flos": 303181465514926080, "step": 78900 }, { "loss": 5.88625, "learning_rate": 9.440323476807898e-06, "epoch": 0.317816639913747, "total_flos": 303554479368683520, "step": 79000 }, { "loss": 5.86, "learning_rate": 9.439512351411967e-06, "epoch": 0.3182189394579416, "total_flos": 303946433112268800, "step": 79100 }, { "loss": 5.855, "learning_rate": 9.438701226016037e-06, "epoch": 0.31862123900213624, "total_flos": 304328359230504960, "step": 79200 }, { "loss": 5.876875, "learning_rate": 9.437890100620107e-06, "epoch": 0.3190235385463308, "total_flos": 304713944794644480, "step": 79300 }, { "loss": 5.845, "learning_rate": 9.437078975224175e-06, "epoch": 0.31942583809052544, "total_flos": 305092827571077120, "step": 79400 }, { "loss": 5.859375, "learning_rate": 9.436267849828245e-06, "epoch": 0.31982813763472007, "total_flos": 305472666371112960, "step": 79500 }, { "loss": 5.913125, "learning_rate": 9.435456724432315e-06, "epoch": 0.3202304371789147, "total_flos": 305865326509916160, "step": 79600 }, { "loss": 5.849375, "learning_rate": 9.434645599036384e-06, "epoch": 0.32063273672310927, "total_flos": 306250524353372160, "step": 79700 }, { "loss": 5.855, "learning_rate": 9.433834473640454e-06, "epoch": 0.3210350362673039, "total_flos": 306637623621550080, "step": 79800 }, { "loss": 5.79125, "learning_rate": 9.433023348244522e-06, "epoch": 0.3214373358114985, "total_flos": 307019762189475840, "step": 79900 }, { "loss": 5.81625, "learning_rate": 9.432212222848592e-06, "epoch": 0.32183963535569315, "total_flos": 307406202863616000, "step": 80000 }, { "loss": 5.825625, "learning_rate": 9.431401097452662e-06, "epoch": 0.3222419348998878, "total_flos": 307792691338936320, "step": 80100 }, { "loss": 5.840625, "learning_rate": 9.43058997205673e-06, "epoch": 0.32264423444408236, "total_flos": 308182897683824640, "step": 80200 }, { "loss": 5.820625, "learning_rate": 9.4297788466608e-06, "epoch": 0.323046533988277, "total_flos": 308549394643353600, "step": 80300 }, { "loss": 5.845625, "learning_rate": 9.42896772126487e-06, "epoch": 0.3234488335324716, "total_flos": 308956209242726400, "step": 80400 }, { "loss": 5.770625, "learning_rate": 9.428156595868939e-06, "epoch": 0.32385113307666624, "total_flos": 309335357581271040, "step": 80500 }, { "loss": 5.81125, "learning_rate": 9.427345470473009e-06, "epoch": 0.3242534326208608, "total_flos": 309707829688320000, "step": 80600 }, { "loss": 5.76875, "learning_rate": 9.426534345077077e-06, "epoch": 0.32465573216505544, "total_flos": 310080227437977600, "step": 80700 }, { "loss": 5.8275, "learning_rate": 9.425723219681147e-06, "epoch": 0.3250580317092501, "total_flos": 310470800258580480, "step": 80800 }, { "loss": 5.75, "learning_rate": 9.424912094285217e-06, "epoch": 0.3254603312534447, "total_flos": 310860491412971520, "step": 80900 }, { "loss": 5.813125, "learning_rate": 9.424100968889286e-06, "epoch": 0.32586263079763933, "total_flos": 311250320659660800, "step": 81000 }, { "loss": 5.819375, "learning_rate": 9.423289843493356e-06, "epoch": 0.3262649303418339, "total_flos": 311636235520819200, "step": 81100 }, { "loss": 5.805, "learning_rate": 9.422478718097426e-06, "epoch": 0.32666722988602853, "total_flos": 312016817894768640, "step": 81200 }, { "loss": 5.77875, "learning_rate": 9.421667592701494e-06, "epoch": 0.32706952943022316, "total_flos": 312404448287170560, "step": 81300 }, { "loss": 5.846875, "learning_rate": 9.420856467305564e-06, "epoch": 0.3274718289744178, "total_flos": 312799286035292160, "step": 81400 }, { "loss": 5.780625, "learning_rate": 9.420045341909634e-06, "epoch": 0.32787412851861236, "total_flos": 313175476011909120, "step": 81500 }, { "loss": 5.766875, "learning_rate": 9.419234216513703e-06, "epoch": 0.328276428062807, "total_flos": 313567254484500480, "step": 81600 }, { "loss": 5.81125, "learning_rate": 9.418423091117773e-06, "epoch": 0.3286787276070016, "total_flos": 313946519670374400, "step": 81700 }, { "loss": 5.810625, "learning_rate": 9.417611965721841e-06, "epoch": 0.32908102715119625, "total_flos": 314327819062026240, "step": 81800 }, { "loss": 5.741875, "learning_rate": 9.416800840325911e-06, "epoch": 0.3294833266953909, "total_flos": 314714249113681920, "step": 81900 }, { "loss": 5.76, "learning_rate": 9.415989714929981e-06, "epoch": 0.32988562623958545, "total_flos": 315087916250234880, "step": 82000 }, { "loss": 5.80125, "learning_rate": 9.41517858953405e-06, "epoch": 0.3302879257837801, "total_flos": 315466857450332160, "step": 82100 }, { "loss": 5.76625, "learning_rate": 9.41436746413812e-06, "epoch": 0.3306902253279747, "total_flos": 315837959256883200, "step": 82200 }, { "loss": 5.733125, "learning_rate": 9.41355633874219e-06, "epoch": 0.33109252487216934, "total_flos": 316212359344865280, "step": 82300 }, { "loss": 5.7425, "learning_rate": 9.412745213346258e-06, "epoch": 0.3314948244163639, "total_flos": 316606039242178560, "step": 82400 }, { "loss": 5.7775, "learning_rate": 9.411934087950328e-06, "epoch": 0.33189712396055854, "total_flos": 316997100697067520, "step": 82500 }, { "loss": 5.800625, "learning_rate": 9.411122962554396e-06, "epoch": 0.33229942350475317, "total_flos": 317373078223994880, "step": 82600 }, { "loss": 5.75, "learning_rate": 9.410311837158466e-06, "epoch": 0.3327017230489478, "total_flos": 317770194495037440, "step": 82700 }, { "loss": 5.7675, "learning_rate": 9.409500711762536e-06, "epoch": 0.3331040225931424, "total_flos": 318161399353466880, "step": 82800 }, { "loss": 5.76, "learning_rate": 9.408689586366605e-06, "epoch": 0.333506322137337, "total_flos": 318548546422824960, "step": 82900 }, { "loss": 5.71, "learning_rate": 9.407878460970675e-06, "epoch": 0.3339086216815316, "total_flos": 318933850491125760, "step": 83000 }, { "loss": 5.750625, "learning_rate": 9.407067335574745e-06, "epoch": 0.33431092122572625, "total_flos": 319313152855695360, "step": 83100 }, { "loss": 5.7625, "learning_rate": 9.406256210178813e-06, "epoch": 0.3347132207699209, "total_flos": 319701569311948800, "step": 83200 }, { "loss": 5.76625, "learning_rate": 9.405445084782883e-06, "epoch": 0.3351155203141155, "total_flos": 320082767789998080, "step": 83300 }, { "loss": 5.791875, "learning_rate": 9.404633959386952e-06, "epoch": 0.3355178198583101, "total_flos": 320455431101767680, "step": 83400 }, { "loss": 5.76875, "learning_rate": 9.403822833991022e-06, "epoch": 0.3359201194025047, "total_flos": 320851506369331200, "step": 83500 }, { "loss": 5.73625, "learning_rate": 9.403011708595092e-06, "epoch": 0.33632241894669934, "total_flos": 321223760715448320, "step": 83600 }, { "loss": 5.683125, "learning_rate": 9.40220058319916e-06, "epoch": 0.33672471849089397, "total_flos": 321607922866667520, "step": 83700 }, { "loss": 5.7575, "learning_rate": 9.40138945780323e-06, "epoch": 0.33712701803508854, "total_flos": 321982636317941760, "step": 83800 }, { "loss": 5.77, "learning_rate": 9.4005783324073e-06, "epoch": 0.3375293175792832, "total_flos": 322380958240972800, "step": 83900 }, { "loss": 5.715, "learning_rate": 9.399767207011368e-06, "epoch": 0.3379316171234778, "total_flos": 322767845059461120, "step": 84000 }, { "loss": 5.734375, "learning_rate": 9.398956081615438e-06, "epoch": 0.33833391666767243, "total_flos": 323166087313858560, "step": 84100 }, { "loss": 5.725625, "learning_rate": 9.398144956219507e-06, "epoch": 0.33873621621186706, "total_flos": 323559953104650240, "step": 84200 }, { "loss": 5.68375, "learning_rate": 9.397333830823577e-06, "epoch": 0.33913851575606163, "total_flos": 323930337893498880, "step": 84300 }, { "loss": 5.706875, "learning_rate": 9.396522705427647e-06, "epoch": 0.33954081530025626, "total_flos": 324308822326763520, "step": 84400 }, { "loss": 5.719375, "learning_rate": 9.395711580031715e-06, "epoch": 0.3399431148444509, "total_flos": 324697663682396160, "step": 84500 }, { "loss": 5.6575, "learning_rate": 9.394900454635785e-06, "epoch": 0.3403454143886455, "total_flos": 325085081625108480, "step": 84600 }, { "loss": 5.71875, "learning_rate": 9.394089329239855e-06, "epoch": 0.3407477139328401, "total_flos": 325464538015703040, "step": 84700 }, { "loss": 5.719375, "learning_rate": 9.393278203843924e-06, "epoch": 0.3411500134770347, "total_flos": 325834848447160320, "step": 84800 }, { "loss": 5.709375, "learning_rate": 9.392467078447994e-06, "epoch": 0.34155231302122935, "total_flos": 326228618635591680, "step": 84900 }, { "loss": 5.67625, "learning_rate": 9.391655953052062e-06, "epoch": 0.341954612565424, "total_flos": 326612100947804160, "step": 85000 }, { "loss": 5.730625, "learning_rate": 9.390844827656132e-06, "epoch": 0.3423569121096186, "total_flos": 327010985862512640, "step": 85100 }, { "loss": 5.72125, "learning_rate": 9.390033702260202e-06, "epoch": 0.3427592116538132, "total_flos": 327392965093171200, "step": 85200 }, { "loss": 5.644375, "learning_rate": 9.38922257686427e-06, "epoch": 0.3431615111980078, "total_flos": 327766510071152640, "step": 85300 }, { "loss": 5.733125, "learning_rate": 9.38841145146834e-06, "epoch": 0.34356381074220244, "total_flos": 328158559417098240, "step": 85400 }, { "loss": 5.668125, "learning_rate": 9.38760032607241e-06, "epoch": 0.34396611028639706, "total_flos": 328537245677568000, "step": 85500 }, { "loss": 5.781875, "learning_rate": 9.386789200676479e-06, "epoch": 0.34436840983059164, "total_flos": 328926315416616960, "step": 85600 }, { "loss": 5.715, "learning_rate": 9.385978075280549e-06, "epoch": 0.34477070937478627, "total_flos": 329316633297592320, "step": 85700 }, { "loss": 5.6575, "learning_rate": 9.385166949884617e-06, "epoch": 0.3451730089189809, "total_flos": 329705835817697280, "step": 85800 }, { "loss": 5.71, "learning_rate": 9.384355824488687e-06, "epoch": 0.3455753084631755, "total_flos": 330086545661460480, "step": 85900 }, { "loss": 5.6675, "learning_rate": 9.383544699092757e-06, "epoch": 0.34597760800737015, "total_flos": 330468981658951680, "step": 86000 }, { "loss": 5.71375, "learning_rate": 9.382733573696826e-06, "epoch": 0.3463799075515647, "total_flos": 330856622673838080, "step": 86100 }, { "loss": 5.63125, "learning_rate": 9.381922448300896e-06, "epoch": 0.34678220709575935, "total_flos": 331225860234362880, "step": 86200 }, { "loss": 5.65125, "learning_rate": 9.381111322904966e-06, "epoch": 0.347184506639954, "total_flos": 331610011763097600, "step": 86300 }, { "loss": 5.670625, "learning_rate": 9.380300197509034e-06, "epoch": 0.3475868061841486, "total_flos": 331978240187596800, "step": 86400 }, { "loss": 5.664375, "learning_rate": 9.379489072113104e-06, "epoch": 0.3479891057283432, "total_flos": 332355487101419520, "step": 86500 }, { "loss": 5.698125, "learning_rate": 9.378677946717174e-06, "epoch": 0.3483914052725378, "total_flos": 332753936494264320, "step": 86600 }, { "loss": 5.69625, "learning_rate": 9.377866821321243e-06, "epoch": 0.34879370481673244, "total_flos": 333148726441205760, "step": 86700 }, { "loss": 5.68625, "learning_rate": 9.377055695925313e-06, "epoch": 0.34919600436092707, "total_flos": 333524810192977920, "step": 86800 }, { "loss": 5.52, "learning_rate": 9.376244570529381e-06, "epoch": 0.3495983039051217, "total_flos": 333900214105743360, "step": 86900 }, { "loss": 5.64375, "learning_rate": 9.375433445133451e-06, "epoch": 0.35000060344931627, "total_flos": 334285364148019200, "step": 87000 }, { "loss": 5.673125, "learning_rate": 9.374622319737521e-06, "epoch": 0.3504029029935109, "total_flos": 334673461929738240, "step": 87100 }, { "loss": 5.653125, "learning_rate": 9.37381119434159e-06, "epoch": 0.35080520253770553, "total_flos": 335045280753991680, "step": 87200 }, { "loss": 5.64, "learning_rate": 9.37300006894566e-06, "epoch": 0.35120750208190016, "total_flos": 335424891170611200, "step": 87300 }, { "loss": 5.640625, "learning_rate": 9.37218894354973e-06, "epoch": 0.35160980162609473, "total_flos": 335811124706304000, "step": 87400 }, { "loss": 5.6675, "learning_rate": 9.371377818153798e-06, "epoch": 0.35201210117028936, "total_flos": 336203800778833920, "step": 87500 }, { "loss": 5.60875, "learning_rate": 9.370566692757868e-06, "epoch": 0.352414400714484, "total_flos": 336600215965900800, "step": 87600 }, { "loss": 5.579375, "learning_rate": 9.369755567361936e-06, "epoch": 0.3528167002586786, "total_flos": 336970420172513280, "step": 87700 }, { "loss": 5.6075, "learning_rate": 9.368944441966006e-06, "epoch": 0.35321899980287325, "total_flos": 337335530897817600, "step": 87800 }, { "loss": 5.616875, "learning_rate": 9.368133316570076e-06, "epoch": 0.3536212993470678, "total_flos": 337705889130455040, "step": 87900 }, { "loss": 5.6375, "learning_rate": 9.367322191174145e-06, "epoch": 0.35402359889126245, "total_flos": 338074951419985920, "step": 88000 }, { "loss": 5.70375, "learning_rate": 9.366511065778215e-06, "epoch": 0.3544258984354571, "total_flos": 338473485792706560, "step": 88100 }, { "loss": 5.67375, "learning_rate": 9.365699940382285e-06, "epoch": 0.3548281979796517, "total_flos": 338848581653422080, "step": 88200 }, { "loss": 5.643125, "learning_rate": 9.364888814986353e-06, "epoch": 0.3552304975238463, "total_flos": 339228882531532800, "step": 88300 }, { "loss": 5.681875, "learning_rate": 9.364077689590423e-06, "epoch": 0.3556327970680409, "total_flos": 339614940796231680, "step": 88400 }, { "loss": 5.584375, "learning_rate": 9.363266564194492e-06, "epoch": 0.35603509661223554, "total_flos": 339989096567070720, "step": 88500 }, { "loss": 5.659375, "learning_rate": 9.362455438798562e-06, "epoch": 0.35643739615643016, "total_flos": 340391003578613760, "step": 88600 }, { "loss": 5.644375, "learning_rate": 9.361644313402632e-06, "epoch": 0.3568396957006248, "total_flos": 340778846420705280, "step": 88700 }, { "loss": 5.624375, "learning_rate": 9.3608331880067e-06, "epoch": 0.35724199524481937, "total_flos": 341154021950054400, "step": 88800 }, { "loss": 5.615, "learning_rate": 9.36002206261077e-06, "epoch": 0.357644294789014, "total_flos": 341549714808176640, "step": 88900 }, { "loss": 5.584375, "learning_rate": 9.35921093721484e-06, "epoch": 0.3580465943332086, "total_flos": 341941891623936000, "step": 89000 }, { "loss": 5.63125, "learning_rate": 9.358399811818908e-06, "epoch": 0.35844889387740325, "total_flos": 342328103914659840, "step": 89100 }, { "loss": 5.568125, "learning_rate": 9.357588686422978e-06, "epoch": 0.3588511934215979, "total_flos": 342714895130787840, "step": 89200 }, { "loss": 5.61375, "learning_rate": 9.356777561027047e-06, "epoch": 0.35925349296579245, "total_flos": 343106105300459520, "step": 89300 }, { "loss": 5.584375, "learning_rate": 9.355966435631117e-06, "epoch": 0.3596557925099871, "total_flos": 343493772871557120, "step": 89400 }, { "loss": 5.586875, "learning_rate": 9.355155310235187e-06, "epoch": 0.3600580920541817, "total_flos": 343870297456435200, "step": 89500 }, { "loss": 5.62, "learning_rate": 9.354344184839255e-06, "epoch": 0.36046039159837634, "total_flos": 344262941661511680, "step": 89600 }, { "loss": 5.55875, "learning_rate": 9.353533059443325e-06, "epoch": 0.3608626911425709, "total_flos": 344656467532800000, "step": 89700 }, { "loss": 5.58, "learning_rate": 9.352721934047395e-06, "epoch": 0.36126499068676554, "total_flos": 345036534716252160, "step": 89800 }, { "loss": 5.584375, "learning_rate": 9.351910808651464e-06, "epoch": 0.36166729023096017, "total_flos": 345409500768829440, "step": 89900 }, { "loss": 5.491875, "learning_rate": 9.351099683255534e-06, "epoch": 0.3620695897751548, "total_flos": 345773756384133120, "step": 90000 }, { "loss": 5.61125, "learning_rate": 9.350288557859602e-06, "epoch": 0.3624718893193494, "total_flos": 346159602199142400, "step": 90100 }, { "loss": 5.5725, "learning_rate": 9.349477432463672e-06, "epoch": 0.362874188863544, "total_flos": 346542643678248960, "step": 90200 }, { "loss": 5.57125, "learning_rate": 9.348666307067742e-06, "epoch": 0.36327648840773863, "total_flos": 346919370090332160, "step": 90300 }, { "loss": 5.5775, "learning_rate": 9.34785518167181e-06, "epoch": 0.36367878795193326, "total_flos": 347309061244723200, "step": 90400 }, { "loss": 5.60875, "learning_rate": 9.34704405627588e-06, "epoch": 0.3640810874961279, "total_flos": 347682011363573760, "step": 90500 }, { "loss": 5.56875, "learning_rate": 9.34623293087995e-06, "epoch": 0.36448338704032246, "total_flos": 348077135918776320, "step": 90600 }, { "loss": 5.568125, "learning_rate": 9.345421805484019e-06, "epoch": 0.3648856865845171, "total_flos": 348465557686272000, "step": 90700 }, { "loss": 5.540625, "learning_rate": 9.344610680088089e-06, "epoch": 0.3652879861287117, "total_flos": 348855190416998400, "step": 90800 }, { "loss": 5.57625, "learning_rate": 9.343799554692159e-06, "epoch": 0.36569028567290635, "total_flos": 349238736464117760, "step": 90900 }, { "loss": 5.569375, "learning_rate": 9.342988429296227e-06, "epoch": 0.366092585217101, "total_flos": 349620104901918720, "step": 91000 }, { "loss": 5.58625, "learning_rate": 9.342177303900297e-06, "epoch": 0.36649488476129555, "total_flos": 349998488421580800, "step": 91100 }, { "loss": 5.556875, "learning_rate": 9.341366178504366e-06, "epoch": 0.3668971843054902, "total_flos": 350389507386531840, "step": 91200 }, { "loss": 5.546875, "learning_rate": 9.340555053108436e-06, "epoch": 0.3672994838496848, "total_flos": 350775385068994560, "step": 91300 }, { "loss": 5.571875, "learning_rate": 9.339743927712506e-06, "epoch": 0.36770178339387943, "total_flos": 351157231518597120, "step": 91400 }, { "loss": 5.531875, "learning_rate": 9.338932802316574e-06, "epoch": 0.368104082938074, "total_flos": 351538536221491200, "step": 91500 }, { "loss": 5.591875, "learning_rate": 9.338121676920644e-06, "epoch": 0.36850638248226864, "total_flos": 351914343788666880, "step": 91600 }, { "loss": 5.544375, "learning_rate": 9.337310551524714e-06, "epoch": 0.36890868202646326, "total_flos": 352283857533788160, "step": 91700 }, { "loss": 5.480625, "learning_rate": 9.336499426128783e-06, "epoch": 0.3693109815706579, "total_flos": 352668125909852160, "step": 91800 }, { "loss": 5.535, "learning_rate": 9.335688300732853e-06, "epoch": 0.3697132811148525, "total_flos": 353039557013422080, "step": 91900 }, { "loss": 5.575625, "learning_rate": 9.334877175336921e-06, "epoch": 0.3701155806590471, "total_flos": 353425461252096000, "step": 92000 }, { "loss": 5.529375, "learning_rate": 9.334066049940991e-06, "epoch": 0.3705178802032417, "total_flos": 353809405642383360, "step": 92100 }, { "loss": 5.53375, "learning_rate": 9.333254924545061e-06, "epoch": 0.37092017974743635, "total_flos": 354191055576023040, "step": 92200 }, { "loss": 5.574375, "learning_rate": 9.33244379914913e-06, "epoch": 0.371322479291631, "total_flos": 354572439947550720, "step": 92300 }, { "loss": 5.555625, "learning_rate": 9.3316326737532e-06, "epoch": 0.37172477883582555, "total_flos": 354953898676469760, "step": 92400 }, { "loss": 5.52, "learning_rate": 9.33082154835727e-06, "epoch": 0.3721270783800202, "total_flos": 355339457684398080, "step": 92500 }, { "loss": 5.4675, "learning_rate": 9.330010422961338e-06, "epoch": 0.3725293779242148, "total_flos": 355726578197544960, "step": 92600 }, { "loss": 5.55625, "learning_rate": 9.329199297565408e-06, "epoch": 0.37293167746840944, "total_flos": 356109481584353280, "step": 92700 }, { "loss": 5.56875, "learning_rate": 9.328388172169476e-06, "epoch": 0.37333397701260407, "total_flos": 356487185265008640, "step": 92800 }, { "loss": 5.534375, "learning_rate": 9.327577046773546e-06, "epoch": 0.37373627655679864, "total_flos": 356872701782999040, "step": 92900 }, { "loss": 5.5525, "learning_rate": 9.326765921377616e-06, "epoch": 0.37413857610099327, "total_flos": 357267343015157760, "step": 93000 }, { "loss": 5.56375, "learning_rate": 9.325954795981685e-06, "epoch": 0.3745408756451879, "total_flos": 357646029275627520, "step": 93100 }, { "loss": 5.520625, "learning_rate": 9.325143670585755e-06, "epoch": 0.3749431751893825, "total_flos": 358013354788945920, "step": 93200 }, { "loss": 5.531875, "learning_rate": 9.324332545189825e-06, "epoch": 0.3753454747335771, "total_flos": 358401617219174400, "step": 93300 }, { "loss": 5.549375, "learning_rate": 9.323521419793893e-06, "epoch": 0.37574777427777173, "total_flos": 358787261206978560, "step": 93400 }, { "loss": 5.5025, "learning_rate": 9.322710294397963e-06, "epoch": 0.37615007382196636, "total_flos": 359171848257576960, "step": 93500 }, { "loss": 5.53375, "learning_rate": 9.321899169002032e-06, "epoch": 0.376552373366161, "total_flos": 359556743360225280, "step": 93600 }, { "loss": 5.525, "learning_rate": 9.321088043606103e-06, "epoch": 0.3769546729103556, "total_flos": 359935705805291520, "step": 93700 }, { "loss": 5.5025, "learning_rate": 9.320276918210172e-06, "epoch": 0.3773569724545502, "total_flos": 360314668250357760, "step": 93800 }, { "loss": 5.485, "learning_rate": 9.31946579281424e-06, "epoch": 0.3777592719987448, "total_flos": 360699414638223360, "step": 93900 }, { "loss": 5.49, "learning_rate": 9.31865466741831e-06, "epoch": 0.37816157154293945, "total_flos": 361070027810488320, "step": 94000 }, { "loss": 5.46875, "learning_rate": 9.31784354202238e-06, "epoch": 0.3785638710871341, "total_flos": 361462454254632960, "step": 94100 }, { "loss": 5.48375, "learning_rate": 9.317032416626448e-06, "epoch": 0.37896617063132865, "total_flos": 361822891086766080, "step": 94200 }, { "loss": 5.4925, "learning_rate": 9.316221291230518e-06, "epoch": 0.3793684701755233, "total_flos": 362205512977735680, "step": 94300 }, { "loss": 5.460625, "learning_rate": 9.315410165834587e-06, "epoch": 0.3797707697197179, "total_flos": 362601365173125120, "step": 94400 }, { "loss": 5.514375, "learning_rate": 9.314599040438659e-06, "epoch": 0.38017306926391253, "total_flos": 362982122818068480, "step": 94500 }, { "loss": 5.5625, "learning_rate": 9.313787915042727e-06, "epoch": 0.38057536880810716, "total_flos": 363363507189596160, "step": 94600 }, { "loss": 5.488125, "learning_rate": 9.312976789646795e-06, "epoch": 0.38097766835230173, "total_flos": 363729249952727040, "step": 94700 }, { "loss": 5.475625, "learning_rate": 9.312165664250865e-06, "epoch": 0.38137996789649636, "total_flos": 364133122123898880, "step": 94800 }, { "loss": 5.52375, "learning_rate": 9.311354538854935e-06, "epoch": 0.381782267440691, "total_flos": 364500877847838720, "step": 94900 }, { "loss": 5.50375, "learning_rate": 9.310543413459004e-06, "epoch": 0.3821845669848856, "total_flos": 364883643142348800, "step": 95000 }, { "loss": 5.546875, "learning_rate": 9.309732288063074e-06, "epoch": 0.38258686652908025, "total_flos": 365283638106685440, "step": 95100 }, { "loss": 5.541875, "learning_rate": 9.308921162667142e-06, "epoch": 0.3829891660732748, "total_flos": 365674710184058880, "step": 95200 }, { "loss": 5.45875, "learning_rate": 9.308110037271214e-06, "epoch": 0.38339146561746945, "total_flos": 366052982167633920, "step": 95300 }, { "loss": 5.486875, "learning_rate": 9.307298911875282e-06, "epoch": 0.3837937651616641, "total_flos": 366441042770657280, "step": 95400 }, { "loss": 5.46, "learning_rate": 9.30648778647935e-06, "epoch": 0.3841960647058587, "total_flos": 366810004146585600, "step": 95500 }, { "loss": 5.5275, "learning_rate": 9.305676661083422e-06, "epoch": 0.3845983642500533, "total_flos": 367180330511769600, "step": 95600 }, { "loss": 5.46625, "learning_rate": 9.30486553568749e-06, "epoch": 0.3850006637942479, "total_flos": 367567259820195840, "step": 95700 }, { "loss": 5.436875, "learning_rate": 9.30405441029156e-06, "epoch": 0.38540296333844254, "total_flos": 367947316381163520, "step": 95800 }, { "loss": 5.38625, "learning_rate": 9.303243284895629e-06, "epoch": 0.38580526288263717, "total_flos": 368330952719400960, "step": 95900 }, { "loss": 5.4475, "learning_rate": 9.302432159499699e-06, "epoch": 0.3862075624268318, "total_flos": 368704152466636800, "step": 96000 }, { "loss": 5.470625, "learning_rate": 9.301621034103769e-06, "epoch": 0.38660986197102637, "total_flos": 369090380691087360, "step": 96100 }, { "loss": 5.45875, "learning_rate": 9.300809908707837e-06, "epoch": 0.387012161515221, "total_flos": 369479168934297600, "step": 96200 }, { "loss": 5.378125, "learning_rate": 9.299998783311906e-06, "epoch": 0.3874144610594156, "total_flos": 369854716250603520, "step": 96300 }, { "loss": 5.463125, "learning_rate": 9.299187657915977e-06, "epoch": 0.38781676060361026, "total_flos": 370235484518031360, "step": 96400 }, { "loss": 5.438125, "learning_rate": 9.298376532520046e-06, "epoch": 0.38821906014780483, "total_flos": 370618483507200000, "step": 96500 }, { "loss": 5.445, "learning_rate": 9.297565407124116e-06, "epoch": 0.38862135969199946, "total_flos": 371000207798231040, "step": 96600 }, { "loss": 5.43875, "learning_rate": 9.296754281728184e-06, "epoch": 0.3890236592361941, "total_flos": 371390265428336640, "step": 96700 }, { "loss": 5.444375, "learning_rate": 9.295943156332254e-06, "epoch": 0.3894259587803887, "total_flos": 371784396781240320, "step": 96800 }, { "loss": 5.454375, "learning_rate": 9.295132030936324e-06, "epoch": 0.38982825832458334, "total_flos": 372158589730775040, "step": 96900 }, { "loss": 5.44375, "learning_rate": 9.294320905540393e-06, "epoch": 0.3902305578687779, "total_flos": 372543723839324160, "step": 97000 }, { "loss": 5.419375, "learning_rate": 9.293509780144461e-06, "epoch": 0.39063285741297255, "total_flos": 372928411803525120, "step": 97100 }, { "loss": 5.384375, "learning_rate": 9.292698654748533e-06, "epoch": 0.3910351569571672, "total_flos": 373322638758789120, "step": 97200 }, { "loss": 5.4125, "learning_rate": 9.291887529352601e-06, "epoch": 0.3914374565013618, "total_flos": 373702844034539520, "step": 97300 }, { "loss": 5.395, "learning_rate": 9.291076403956671e-06, "epoch": 0.3918397560455564, "total_flos": 374094001091788800, "step": 97400 }, { "loss": 5.4225, "learning_rate": 9.29026527856074e-06, "epoch": 0.392242055589751, "total_flos": 374482061694812160, "step": 97500 }, { "loss": 5.459375, "learning_rate": 9.28945415316481e-06, "epoch": 0.39264435513394563, "total_flos": 374866138866155520, "step": 97600 }, { "loss": 5.45375, "learning_rate": 9.28864302776888e-06, "epoch": 0.39304665467814026, "total_flos": 375256026536509440, "step": 97700 }, { "loss": 5.403125, "learning_rate": 9.287831902372948e-06, "epoch": 0.3934489542223349, "total_flos": 375647921856430080, "step": 97800 }, { "loss": 5.336875, "learning_rate": 9.287020776977016e-06, "epoch": 0.39385125376652946, "total_flos": 376023203610624000, "step": 97900 }, { "loss": 5.41375, "learning_rate": 9.286209651581088e-06, "epoch": 0.3942535533107241, "total_flos": 376407790661222400, "step": 98000 }, { "loss": 5.4225, "learning_rate": 9.285398526185156e-06, "epoch": 0.3946558528549187, "total_flos": 376794204779151360, "step": 98100 }, { "loss": 5.5025, "learning_rate": 9.284587400789226e-06, "epoch": 0.39505815239911335, "total_flos": 377175350144778240, "step": 98200 }, { "loss": 5.409375, "learning_rate": 9.283776275393295e-06, "epoch": 0.3954604519433079, "total_flos": 377553499969781760, "step": 98300 }, { "loss": 5.431875, "learning_rate": 9.282965149997365e-06, "epoch": 0.39586275148750255, "total_flos": 377936987593236480, "step": 98400 }, { "loss": 5.38, "learning_rate": 9.282154024601435e-06, "epoch": 0.3962650510316972, "total_flos": 378322435065077760, "step": 98500 }, { "loss": 5.32125, "learning_rate": 9.281342899205503e-06, "epoch": 0.3966673505758918, "total_flos": 378693451891752960, "step": 98600 }, { "loss": 5.398125, "learning_rate": 9.280531773809573e-06, "epoch": 0.39706965012008644, "total_flos": 379061956500848640, "step": 98700 }, { "loss": 5.40375, "learning_rate": 9.279720648413643e-06, "epoch": 0.397471949664281, "total_flos": 379452088488345600, "step": 98800 }, { "loss": 5.36375, "learning_rate": 9.278909523017712e-06, "epoch": 0.39787424920847564, "total_flos": 379832702729748480, "step": 98900 }, { "loss": 5.41375, "learning_rate": 9.278098397621782e-06, "epoch": 0.39827654875267027, "total_flos": 380225809012899840, "step": 99000 }, { "loss": 5.405625, "learning_rate": 9.27728727222585e-06, "epoch": 0.3986788482968649, "total_flos": 380605754037780480, "step": 99100 }, { "loss": 5.368125, "learning_rate": 9.27647614682992e-06, "epoch": 0.39908114784105947, "total_flos": 380974518897745920, "step": 99200 }, { "loss": 5.445625, "learning_rate": 9.27566502143399e-06, "epoch": 0.3994834473852541, "total_flos": 381370641966489600, "step": 99300 }, { "loss": 5.365, "learning_rate": 9.274853896038058e-06, "epoch": 0.3998857469294487, "total_flos": 381756020392181760, "step": 99400 }, { "loss": 5.39, "learning_rate": 9.274042770642128e-06, "epoch": 0.40028804647364336, "total_flos": 382127621455503360, "step": 99500 }, { "loss": 5.32875, "learning_rate": 9.273231645246199e-06, "epoch": 0.400690346017838, "total_flos": 382501910007398400, "step": 99600 }, { "loss": 5.386875, "learning_rate": 9.272420519850267e-06, "epoch": 0.40109264556203256, "total_flos": 382885875642654720, "step": 99700 }, { "loss": 5.401875, "learning_rate": 9.271609394454337e-06, "epoch": 0.4014949451062272, "total_flos": 383270478626979840, "step": 99800 }, { "loss": 5.416875, "learning_rate": 9.270798269058405e-06, "epoch": 0.4018972446504218, "total_flos": 383649680077946880, "step": 99900 }, { "loss": 5.3925, "learning_rate": 9.269987143662475e-06, "epoch": 0.40229954419461644, "total_flos": 384053892168622080, "step": 100000 }, { "loss": 5.386875, "learning_rate": 9.269176018266545e-06, "epoch": 0.40270184373881107, "total_flos": 384447173722767360, "step": 100100 }, { "loss": 5.39875, "learning_rate": 9.268364892870614e-06, "epoch": 0.40310414328300564, "total_flos": 384821557877022720, "step": 100200 }, { "loss": 5.3975, "learning_rate": 9.267553767474684e-06, "epoch": 0.4035064428272003, "total_flos": 385189350779658240, "step": 100300 }, { "loss": 5.349375, "learning_rate": 9.266742642078754e-06, "epoch": 0.4039087423713949, "total_flos": 385564669712547840, "step": 100400 }, { "loss": 5.409375, "learning_rate": 9.265931516682822e-06, "epoch": 0.40431104191558953, "total_flos": 385973922172108800, "step": 100500 }, { "loss": 5.33375, "learning_rate": 9.265120391286892e-06, "epoch": 0.4047133414597841, "total_flos": 386358796029788160, "step": 100600 }, { "loss": 5.365625, "learning_rate": 9.264309265890962e-06, "epoch": 0.40511564100397873, "total_flos": 386739049106718720, "step": 100700 }, { "loss": 5.349375, "learning_rate": 9.26349814049503e-06, "epoch": 0.40551794054817336, "total_flos": 387114777005260800, "step": 100800 }, { "loss": 5.358125, "learning_rate": 9.2626870150991e-06, "epoch": 0.405920240092368, "total_flos": 387509136741580800, "step": 100900 }, { "loss": 5.3125, "learning_rate": 9.261875889703169e-06, "epoch": 0.4063225396365626, "total_flos": 387873647296512000, "step": 101000 }, { "loss": 5.364375, "learning_rate": 9.261064764307239e-06, "epoch": 0.4067248391807572, "total_flos": 388228178027274240, "step": 101100 }, { "loss": 5.3175, "learning_rate": 9.260253638911309e-06, "epoch": 0.4071271387249518, "total_flos": 388622739590799360, "step": 101200 }, { "loss": 5.3825, "learning_rate": 9.259442513515377e-06, "epoch": 0.40752943826914645, "total_flos": 388990075726602240, "step": 101300 }, { "loss": 5.38, "learning_rate": 9.258631388119447e-06, "epoch": 0.4079317378133411, "total_flos": 389370047307694080, "step": 101400 }, { "loss": 5.38, "learning_rate": 9.257820262723517e-06, "epoch": 0.40833403735753565, "total_flos": 389764231773020160, "step": 101500 }, { "loss": 5.2925, "learning_rate": 9.257009137327586e-06, "epoch": 0.4087363369017303, "total_flos": 390132130900500480, "step": 101600 }, { "loss": 5.301875, "learning_rate": 9.256198011931656e-06, "epoch": 0.4091386364459249, "total_flos": 390533150934589440, "step": 101700 }, { "loss": 5.3125, "learning_rate": 9.255386886535724e-06, "epoch": 0.40954093599011954, "total_flos": 390907593512509440, "step": 101800 }, { "loss": 5.34125, "learning_rate": 9.254575761139794e-06, "epoch": 0.40994323553431417, "total_flos": 391289705524224000, "step": 101900 }, { "loss": 5.400625, "learning_rate": 9.253764635743864e-06, "epoch": 0.41034553507850874, "total_flos": 391663138966118400, "step": 102000 }, { "loss": 5.36875, "learning_rate": 9.252953510347933e-06, "epoch": 0.41074783462270337, "total_flos": 392049117562183680, "step": 102100 }, { "loss": 5.294375, "learning_rate": 9.252142384952003e-06, "epoch": 0.411150134166898, "total_flos": 392441172219371520, "step": 102200 }, { "loss": 5.3025, "learning_rate": 9.251331259556073e-06, "epoch": 0.4115524337110926, "total_flos": 392824808557608960, "step": 102300 }, { "loss": 5.29, "learning_rate": 9.250520134160141e-06, "epoch": 0.4119547332552872, "total_flos": 393204727026278400, "step": 102400 }, { "loss": 5.371875, "learning_rate": 9.249709008764211e-06, "epoch": 0.4123570327994818, "total_flos": 393594306644582400, "step": 102500 }, { "loss": 5.27875, "learning_rate": 9.24889788336828e-06, "epoch": 0.41275933234367645, "total_flos": 393979637269094400, "step": 102600 }, { "loss": 5.34125, "learning_rate": 9.24808675797235e-06, "epoch": 0.4131616318878711, "total_flos": 394348062209556480, "step": 102700 }, { "loss": 5.341875, "learning_rate": 9.24727563257642e-06, "epoch": 0.4135639314320657, "total_flos": 394730036128972800, "step": 102800 }, { "loss": 5.310625, "learning_rate": 9.246464507180488e-06, "epoch": 0.4139662309762603, "total_flos": 395111547970314240, "step": 102900 }, { "loss": 5.298125, "learning_rate": 9.245653381784558e-06, "epoch": 0.4143685305204549, "total_flos": 395490967182213120, "step": 103000 }, { "loss": 5.324375, "learning_rate": 9.244842256388628e-06, "epoch": 0.41477083006464954, "total_flos": 395873068571443200, "step": 103100 }, { "loss": 5.3375, "learning_rate": 9.244031130992696e-06, "epoch": 0.41517312960884417, "total_flos": 396251807944335360, "step": 103200 }, { "loss": 5.311875, "learning_rate": 9.243220005596766e-06, "epoch": 0.41557542915303874, "total_flos": 396616382234173440, "step": 103300 }, { "loss": 5.350625, "learning_rate": 9.242408880200835e-06, "epoch": 0.4159777286972334, "total_flos": 396989597915136000, "step": 103400 }, { "loss": 5.333125, "learning_rate": 9.241597754804905e-06, "epoch": 0.416380028241428, "total_flos": 397361634500321280, "step": 103500 }, { "loss": 5.26625, "learning_rate": 9.240786629408975e-06, "epoch": 0.41678232778562263, "total_flos": 397751814288998400, "step": 103600 }, { "loss": 5.32375, "learning_rate": 9.239975504013043e-06, "epoch": 0.41718462732981726, "total_flos": 398139184430530560, "step": 103700 }, { "loss": 5.259375, "learning_rate": 9.239164378617113e-06, "epoch": 0.41758692687401183, "total_flos": 398521939102556160, "step": 103800 }, { "loss": 5.295, "learning_rate": 9.238353253221183e-06, "epoch": 0.41798922641820646, "total_flos": 398891808700907520, "step": 103900 }, { "loss": 5.308125, "learning_rate": 9.237542127825252e-06, "epoch": 0.4183915259624011, "total_flos": 399273846355230720, "step": 104000 }, { "loss": 5.286875, "learning_rate": 9.236731002429322e-06, "epoch": 0.4187938255065957, "total_flos": 399660154248314880, "step": 104100 }, { "loss": 5.3025, "learning_rate": 9.23591987703339e-06, "epoch": 0.4191961250507903, "total_flos": 400043790586552320, "step": 104200 }, { "loss": 5.28, "learning_rate": 9.23510875163746e-06, "epoch": 0.4195984245949849, "total_flos": 400430486200320000, "step": 104300 }, { "loss": 5.328125, "learning_rate": 9.23429762624153e-06, "epoch": 0.42000072413917955, "total_flos": 400807605644328960, "step": 104400 }, { "loss": 5.30375, "learning_rate": 9.233486500845598e-06, "epoch": 0.4204030236833742, "total_flos": 401197041859092480, "step": 104500 }, { "loss": 5.320625, "learning_rate": 9.232675375449668e-06, "epoch": 0.4208053232275688, "total_flos": 401588517590876160, "step": 104600 }, { "loss": 5.2375, "learning_rate": 9.231864250053739e-06, "epoch": 0.4212076227717634, "total_flos": 401950526550712320, "step": 104700 }, { "loss": 5.3675, "learning_rate": 9.231053124657807e-06, "epoch": 0.421609922315958, "total_flos": 402347191366164480, "step": 104800 }, { "loss": 5.276875, "learning_rate": 9.230241999261877e-06, "epoch": 0.42201222186015264, "total_flos": 402734030383472640, "step": 104900 }, { "loss": 5.24125, "learning_rate": 9.229430873865947e-06, "epoch": 0.42241452140434727, "total_flos": 403115096080465920, "step": 105000 }, { "loss": 5.328125, "learning_rate": 9.228619748470015e-06, "epoch": 0.42281682094854184, "total_flos": 403504080839639040, "step": 105100 }, { "loss": 5.25625, "learning_rate": 9.227808623074085e-06, "epoch": 0.42321912049273647, "total_flos": 403892454805954560, "step": 105200 }, { "loss": 5.31625, "learning_rate": 9.226997497678154e-06, "epoch": 0.4236214200369311, "total_flos": 404286288729292800, "step": 105300 }, { "loss": 5.255625, "learning_rate": 9.226186372282224e-06, "epoch": 0.4240237195811257, "total_flos": 404673648248340480, "step": 105400 }, { "loss": 5.241875, "learning_rate": 9.225375246886294e-06, "epoch": 0.42442601912532035, "total_flos": 405055377850613760, "step": 105500 }, { "loss": 5.22, "learning_rate": 9.224564121490362e-06, "epoch": 0.4248283186695149, "total_flos": 405456456308367360, "step": 105600 }, { "loss": 5.32625, "learning_rate": 9.223752996094432e-06, "epoch": 0.42523061821370955, "total_flos": 405845786298286080, "step": 105700 }, { "loss": 5.285625, "learning_rate": 9.222941870698502e-06, "epoch": 0.4256329177579042, "total_flos": 406207476583587840, "step": 105800 }, { "loss": 5.313125, "learning_rate": 9.22213074530257e-06, "epoch": 0.4260352173020988, "total_flos": 406597656372264960, "step": 105900 }, { "loss": 5.231875, "learning_rate": 9.22131961990664e-06, "epoch": 0.42643751684629344, "total_flos": 406986582707773440, "step": 106000 }, { "loss": 5.2725, "learning_rate": 9.220508494510709e-06, "epoch": 0.426839816390488, "total_flos": 407363091358924800, "step": 106100 }, { "loss": 5.214375, "learning_rate": 9.219697369114779e-06, "epoch": 0.42724211593468264, "total_flos": 407732403276840960, "step": 106200 }, { "loss": 5.253125, "learning_rate": 9.218886243718849e-06, "epoch": 0.42764441547887727, "total_flos": 408115258862469120, "step": 106300 }, { "loss": 5.29, "learning_rate": 9.218075118322917e-06, "epoch": 0.4280467150230719, "total_flos": 408518509618298880, "step": 106400 }, { "loss": 5.31875, "learning_rate": 9.217263992926987e-06, "epoch": 0.4284490145672665, "total_flos": 408906591466291200, "step": 106500 }, { "loss": 5.2625, "learning_rate": 9.216452867531057e-06, "epoch": 0.4288513141114611, "total_flos": 409297105863229440, "step": 106600 }, { "loss": 5.270625, "learning_rate": 9.215641742135126e-06, "epoch": 0.42925361365565573, "total_flos": 409667086997667840, "step": 106700 }, { "loss": 5.253125, "learning_rate": 9.214830616739196e-06, "epoch": 0.42965591319985036, "total_flos": 410049140585717760, "step": 106800 }, { "loss": 5.255625, "learning_rate": 9.214019491343264e-06, "epoch": 0.430058212744045, "total_flos": 410428644777492480, "step": 106900 }, { "loss": 5.27625, "learning_rate": 9.213208365947334e-06, "epoch": 0.43046051228823956, "total_flos": 410810724921753600, "step": 107000 }, { "loss": 5.241875, "learning_rate": 9.212397240551404e-06, "epoch": 0.4308628118324342, "total_flos": 411183308564889600, "step": 107100 }, { "loss": 5.295625, "learning_rate": 9.211586115155473e-06, "epoch": 0.4312651113766288, "total_flos": 411568803837911040, "step": 107200 }, { "loss": 5.256875, "learning_rate": 9.210774989759543e-06, "epoch": 0.43166741092082345, "total_flos": 411962398755348480, "step": 107300 }, { "loss": 5.248125, "learning_rate": 9.209963864363613e-06, "epoch": 0.432069710465018, "total_flos": 412339326994636800, "step": 107400 }, { "loss": 5.301875, "learning_rate": 9.209152738967681e-06, "epoch": 0.43247201000921265, "total_flos": 412714406921625600, "step": 107500 }, { "loss": 5.22, "learning_rate": 9.208341613571751e-06, "epoch": 0.4328743095534073, "total_flos": 413101245938933760, "step": 107600 }, { "loss": 5.246875, "learning_rate": 9.20753048817582e-06, "epoch": 0.4332766090976019, "total_flos": 413489232184565760, "step": 107700 }, { "loss": 5.27125, "learning_rate": 9.20671936277989e-06, "epoch": 0.43367890864179653, "total_flos": 413883645033308160, "step": 107800 }, { "loss": 5.234375, "learning_rate": 9.20590823738396e-06, "epoch": 0.4340812081859911, "total_flos": 414283390369259520, "step": 107900 }, { "loss": 5.244375, "learning_rate": 9.205097111988028e-06, "epoch": 0.43448350773018574, "total_flos": 414672279526072320, "step": 108000 }, { "loss": 5.26, "learning_rate": 9.204285986592098e-06, "epoch": 0.43488580727438036, "total_flos": 415073825373143040, "step": 108100 }, { "loss": 5.205, "learning_rate": 9.203474861196168e-06, "epoch": 0.435288106818575, "total_flos": 415445575151247360, "step": 108200 }, { "loss": 5.205625, "learning_rate": 9.202663735800236e-06, "epoch": 0.43569040636276957, "total_flos": 415813591126056960, "step": 108300 }, { "loss": 5.2075, "learning_rate": 9.201852610404306e-06, "epoch": 0.4360927059069642, "total_flos": 416204610091008000, "step": 108400 }, { "loss": 5.225, "learning_rate": 9.201041485008375e-06, "epoch": 0.4364950054511588, "total_flos": 416584406401105920, "step": 108500 }, { "loss": 5.163125, "learning_rate": 9.200230359612445e-06, "epoch": 0.43689730499535345, "total_flos": 416962805854494720, "step": 108600 }, { "loss": 5.24125, "learning_rate": 9.199419234216515e-06, "epoch": 0.4372996045395481, "total_flos": 417353091868016640, "step": 108700 }, { "loss": 5.183125, "learning_rate": 9.198608108820583e-06, "epoch": 0.43770190408374265, "total_flos": 417742289076879360, "step": 108800 }, { "loss": 5.204375, "learning_rate": 9.197796983424653e-06, "epoch": 0.4381042036279373, "total_flos": 418121676421324800, "step": 108900 }, { "loss": 5.1525, "learning_rate": 9.196985858028723e-06, "epoch": 0.4385065031721319, "total_flos": 418505817327575040, "step": 109000 }, { "loss": 5.245625, "learning_rate": 9.196174732632792e-06, "epoch": 0.43890880271632654, "total_flos": 418885502101585920, "step": 109100 }, { "loss": 5.17125, "learning_rate": 9.195363607236862e-06, "epoch": 0.4393111022605211, "total_flos": 419279920261570560, "step": 109200 }, { "loss": 5.21, "learning_rate": 9.19455248184093e-06, "epoch": 0.43971340180471574, "total_flos": 419691334396723200, "step": 109300 }, { "loss": 5.270625, "learning_rate": 9.193741356445e-06, "epoch": 0.44011570134891037, "total_flos": 420078667359559680, "step": 109400 }, { "loss": 5.23875, "learning_rate": 9.19293023104907e-06, "epoch": 0.440518000893105, "total_flos": 420459058528788480, "step": 109500 }, { "loss": 5.18, "learning_rate": 9.192119105653138e-06, "epoch": 0.44092030043729963, "total_flos": 420852531287654400, "step": 109600 }, { "loss": 5.26, "learning_rate": 9.191307980257208e-06, "epoch": 0.4413225999814942, "total_flos": 421237357344153600, "step": 109700 }, { "loss": 5.246875, "learning_rate": 9.190496854861279e-06, "epoch": 0.44172489952568883, "total_flos": 421630224621404160, "step": 109800 }, { "loss": 5.21, "learning_rate": 9.189685729465347e-06, "epoch": 0.44212719906988346, "total_flos": 422018540164055040, "step": 109900 }, { "loss": 5.22875, "learning_rate": 9.188874604069417e-06, "epoch": 0.4425294986140781, "total_flos": 422395319688560640, "step": 110000 }, { "loss": 5.16, "learning_rate": 9.188063478673487e-06, "epoch": 0.44293179815827266, "total_flos": 422764254508277760, "step": 110100 }, { "loss": 5.148125, "learning_rate": 9.187252353277555e-06, "epoch": 0.4433340977024673, "total_flos": 423142298108436480, "step": 110200 }, { "loss": 5.155625, "learning_rate": 9.186441227881625e-06, "epoch": 0.4437363972466619, "total_flos": 423528011142389760, "step": 110300 }, { "loss": 5.185625, "learning_rate": 9.185630102485694e-06, "epoch": 0.44413869679085655, "total_flos": 423921728218398720, "step": 110400 }, { "loss": 5.195, "learning_rate": 9.184818977089764e-06, "epoch": 0.4445409963350512, "total_flos": 424295368798740480, "step": 110500 }, { "loss": 5.200625, "learning_rate": 9.184007851693834e-06, "epoch": 0.44494329587924575, "total_flos": 424686892331704320, "step": 110600 }, { "loss": 5.140625, "learning_rate": 9.183196726297902e-06, "epoch": 0.4453455954234404, "total_flos": 425055280093470720, "step": 110700 }, { "loss": 5.245, "learning_rate": 9.182385600901972e-06, "epoch": 0.445747894967635, "total_flos": 425436712266178560, "step": 110800 }, { "loss": 5.128125, "learning_rate": 9.181574475506042e-06, "epoch": 0.44615019451182963, "total_flos": 425823758421934080, "step": 110900 }, { "loss": 5.220625, "learning_rate": 9.18076335011011e-06, "epoch": 0.4465524940560242, "total_flos": 426228034247516160, "step": 111000 }, { "loss": 5.1975, "learning_rate": 9.17995222471418e-06, "epoch": 0.44695479360021884, "total_flos": 426619701184020480, "step": 111100 }, { "loss": 5.1625, "learning_rate": 9.179141099318249e-06, "epoch": 0.44735709314441346, "total_flos": 426969956364779520, "step": 111200 }, { "loss": 5.135, "learning_rate": 9.178329973922319e-06, "epoch": 0.4477593926886081, "total_flos": 427349556158914560, "step": 111300 }, { "loss": 5.163125, "learning_rate": 9.177518848526389e-06, "epoch": 0.4481616922328027, "total_flos": 427733293410754560, "step": 111400 }, { "loss": 5.18, "learning_rate": 9.176707723130457e-06, "epoch": 0.4485639917769973, "total_flos": 428113450885324800, "step": 111500 }, { "loss": 5.12875, "learning_rate": 9.175896597734527e-06, "epoch": 0.4489662913211919, "total_flos": 428474386974228480, "step": 111600 }, { "loss": 5.189375, "learning_rate": 9.175085472338597e-06, "epoch": 0.44936859086538655, "total_flos": 428856807037992960, "step": 111700 }, { "loss": 5.186875, "learning_rate": 9.174274346942666e-06, "epoch": 0.4497708904095812, "total_flos": 429239667934863360, "step": 111800 }, { "loss": 5.15125, "learning_rate": 9.173463221546736e-06, "epoch": 0.4501731899537758, "total_flos": 429627526710681600, "step": 111900 }, { "loss": 5.175, "learning_rate": 9.172652096150804e-06, "epoch": 0.4505754894979704, "total_flos": 430014339171778560, "step": 112000 }, { "loss": 5.115625, "learning_rate": 9.171840970754874e-06, "epoch": 0.450977789042165, "total_flos": 430383746692055040, "step": 112100 }, { "loss": 5.21125, "learning_rate": 9.171029845358944e-06, "epoch": 0.45138008858635964, "total_flos": 430770012095201280, "step": 112200 }, { "loss": 5.111875, "learning_rate": 9.170218719963013e-06, "epoch": 0.45178238813055427, "total_flos": 431142983459020800, "step": 112300 }, { "loss": 5.1575, "learning_rate": 9.169407594567083e-06, "epoch": 0.45218468767474884, "total_flos": 431530332355584000, "step": 112400 }, { "loss": 5.16875, "learning_rate": 9.168596469171153e-06, "epoch": 0.45258698721894347, "total_flos": 431906702914437120, "step": 112500 }, { "loss": 5.169375, "learning_rate": 9.167785343775221e-06, "epoch": 0.4529892867631381, "total_flos": 432284884606894080, "step": 112600 }, { "loss": 5.173125, "learning_rate": 9.166974218379291e-06, "epoch": 0.45339158630733273, "total_flos": 432675468049981440, "step": 112700 }, { "loss": 5.1425, "learning_rate": 9.16616309298336e-06, "epoch": 0.45379388585152736, "total_flos": 433045905951252480, "step": 112800 }, { "loss": 5.17, "learning_rate": 9.16535196758743e-06, "epoch": 0.45419618539572193, "total_flos": 433441412915896320, "step": 112900 }, { "loss": 5.138125, "learning_rate": 9.1645408421915e-06, "epoch": 0.45459848493991656, "total_flos": 433830530456125440, "step": 113000 }, { "loss": 5.074375, "learning_rate": 9.163729716795568e-06, "epoch": 0.4550007844841112, "total_flos": 434216519674675200, "step": 113100 }, { "loss": 5.13875, "learning_rate": 9.162918591399638e-06, "epoch": 0.4554030840283058, "total_flos": 434598976917135360, "step": 113200 }, { "loss": 5.145625, "learning_rate": 9.162107466003708e-06, "epoch": 0.4558053835725004, "total_flos": 434980690585681920, "step": 113300 }, { "loss": 5.13625, "learning_rate": 9.161296340607776e-06, "epoch": 0.456207683116695, "total_flos": 435357422309007360, "step": 113400 }, { "loss": 5.211875, "learning_rate": 9.160485215211846e-06, "epoch": 0.45660998266088965, "total_flos": 435771279615590400, "step": 113500 }, { "loss": 5.145, "learning_rate": 9.159674089815915e-06, "epoch": 0.4570122822050843, "total_flos": 436152196597800960, "step": 113600 }, { "loss": 5.129375, "learning_rate": 9.158862964419985e-06, "epoch": 0.4574145817492789, "total_flos": 436543135894118400, "step": 113700 }, { "loss": 5.08375, "learning_rate": 9.158051839024055e-06, "epoch": 0.4578168812934735, "total_flos": 436914906917191680, "step": 113800 }, { "loss": 5.151875, "learning_rate": 9.157240713628123e-06, "epoch": 0.4582191808376681, "total_flos": 437300588083691520, "step": 113900 }, { "loss": 5.10625, "learning_rate": 9.156429588232193e-06, "epoch": 0.45862148038186273, "total_flos": 437672008564776960, "step": 114000 }, { "loss": 5.065, "learning_rate": 9.155618462836263e-06, "epoch": 0.45902377992605736, "total_flos": 438041761315799040, "step": 114100 }, { "loss": 5.0925, "learning_rate": 9.154807337440332e-06, "epoch": 0.45942607947025194, "total_flos": 438429689137766400, "step": 114200 }, { "loss": 5.113125, "learning_rate": 9.153996212044402e-06, "epoch": 0.45982837901444656, "total_flos": 438815720846254080, "step": 114300 }, { "loss": 5.101875, "learning_rate": 9.153185086648472e-06, "epoch": 0.4602306785586412, "total_flos": 439186211859947520, "step": 114400 }, { "loss": 5.0875, "learning_rate": 9.15237396125254e-06, "epoch": 0.4606329781028358, "total_flos": 439568042375823360, "step": 114500 }, { "loss": 5.135, "learning_rate": 9.15156283585661e-06, "epoch": 0.46103527764703045, "total_flos": 439954807035740160, "step": 114600 }, { "loss": 5.106875, "learning_rate": 9.150751710460678e-06, "epoch": 0.461437577191225, "total_flos": 440353166137466880, "step": 114700 }, { "loss": 5.099375, "learning_rate": 9.14994058506475e-06, "epoch": 0.46183987673541965, "total_flos": 440737662896947200, "step": 114800 }, { "loss": 5.1325, "learning_rate": 9.149129459668819e-06, "epoch": 0.4622421762796143, "total_flos": 441123344063447040, "step": 114900 }, { "loss": 5.119375, "learning_rate": 9.148318334272887e-06, "epoch": 0.4626444758238089, "total_flos": 441511484335104000, "step": 115000 }, { "loss": 5.06, "learning_rate": 9.147507208876957e-06, "epoch": 0.4630467753680035, "total_flos": 441877030582272000, "step": 115100 }, { "loss": 5.025625, "learning_rate": 9.146696083481027e-06, "epoch": 0.4634490749121981, "total_flos": 442251260710502400, "step": 115200 }, { "loss": 5.0475, "learning_rate": 9.145884958085095e-06, "epoch": 0.46385137445639274, "total_flos": 442618649958727680, "step": 115300 }, { "loss": 5.133125, "learning_rate": 9.145073832689165e-06, "epoch": 0.46425367400058737, "total_flos": 443004957851811840, "step": 115400 }, { "loss": 5.145, "learning_rate": 9.144262707293234e-06, "epoch": 0.464655973544782, "total_flos": 443407220716584960, "step": 115500 }, { "loss": 5.05875, "learning_rate": 9.143451581897305e-06, "epoch": 0.46505827308897657, "total_flos": 443787234787614720, "step": 115600 }, { "loss": 5.1275, "learning_rate": 9.142640456501374e-06, "epoch": 0.4654605726331712, "total_flos": 444154358473728000, "step": 115700 }, { "loss": 5.016875, "learning_rate": 9.141829331105442e-06, "epoch": 0.4658628721773658, "total_flos": 444527637889597440, "step": 115800 }, { "loss": 5.065625, "learning_rate": 9.141018205709512e-06, "epoch": 0.46626517172156046, "total_flos": 444912347098767360, "step": 115900 }, { "loss": 5.1575, "learning_rate": 9.140207080313582e-06, "epoch": 0.46666747126575503, "total_flos": 445302627801047040, "step": 116000 }, { "loss": 5.054375, "learning_rate": 9.139395954917652e-06, "epoch": 0.46706977080994966, "total_flos": 445674919325859840, "step": 116100 }, { "loss": 5.1, "learning_rate": 9.13858482952172e-06, "epoch": 0.4674720703541443, "total_flos": 446063218934784000, "step": 116200 }, { "loss": 5.094375, "learning_rate": 9.137773704125789e-06, "epoch": 0.4678743698983389, "total_flos": 446431882881146880, "step": 116300 }, { "loss": 5.044375, "learning_rate": 9.13696257872986e-06, "epoch": 0.46827666944253354, "total_flos": 446829758659829760, "step": 116400 }, { "loss": 5.0525, "learning_rate": 9.136151453333929e-06, "epoch": 0.4686789689867281, "total_flos": 447211477639618560, "step": 116500 }, { "loss": 5.035625, "learning_rate": 9.135340327937997e-06, "epoch": 0.46908126853092275, "total_flos": 447599315170467840, "step": 116600 }, { "loss": 5.088125, "learning_rate": 9.134529202542067e-06, "epoch": 0.4694835680751174, "total_flos": 447965875864903680, "step": 116700 }, { "loss": 5.0575, "learning_rate": 9.133718077146137e-06, "epoch": 0.469885867619312, "total_flos": 448351886328422400, "step": 116800 }, { "loss": 5.06875, "learning_rate": 9.132906951750208e-06, "epoch": 0.4702881671635066, "total_flos": 448727571737026560, "step": 116900 }, { "loss": 5.0325, "learning_rate": 9.132095826354276e-06, "epoch": 0.4706904667077012, "total_flos": 449110066158182400, "step": 117000 }, { "loss": 5.080625, "learning_rate": 9.131284700958344e-06, "epoch": 0.47109276625189583, "total_flos": 449495667656048640, "step": 117100 }, { "loss": 5.0975, "learning_rate": 9.130473575562416e-06, "epoch": 0.47149506579609046, "total_flos": 449883446763233280, "step": 117200 }, { "loss": 5.118125, "learning_rate": 9.129662450166484e-06, "epoch": 0.4718973653402851, "total_flos": 450270965619548160, "step": 117300 }, { "loss": 5.018125, "learning_rate": 9.128851324770553e-06, "epoch": 0.47229966488447966, "total_flos": 450637770631127040, "step": 117400 }, { "loss": 5.070625, "learning_rate": 9.128040199374623e-06, "epoch": 0.4727019644286743, "total_flos": 451040947029565440, "step": 117500 }, { "loss": 5.08625, "learning_rate": 9.127229073978693e-06, "epoch": 0.4731042639728689, "total_flos": 451424461209231360, "step": 117600 }, { "loss": 5.063125, "learning_rate": 9.126417948582763e-06, "epoch": 0.47350656351706355, "total_flos": 451818396046172160, "step": 117700 }, { "loss": 5.055625, "learning_rate": 9.125606823186831e-06, "epoch": 0.4739088630612582, "total_flos": 452202882183168000, "step": 117800 }, { "loss": 5.08125, "learning_rate": 9.1247956977909e-06, "epoch": 0.47431116260545275, "total_flos": 452585663411404800, "step": 117900 }, { "loss": 5.06625, "learning_rate": 9.123984572394971e-06, "epoch": 0.4747134621496474, "total_flos": 452958475437957120, "step": 118000 }, { "loss": 5.045, "learning_rate": 9.12317344699904e-06, "epoch": 0.475115761693842, "total_flos": 453328775246929920, "step": 118100 }, { "loss": 5.005, "learning_rate": 9.12236232160311e-06, "epoch": 0.47551806123803664, "total_flos": 453695027889315840, "step": 118200 }, { "loss": 5.056875, "learning_rate": 9.121551196207178e-06, "epoch": 0.4759203607822312, "total_flos": 454083832066252800, "step": 118300 }, { "loss": 5.01625, "learning_rate": 9.120740070811248e-06, "epoch": 0.47632266032642584, "total_flos": 454473921563811840, "step": 118400 }, { "loss": 5.02125, "learning_rate": 9.119928945415318e-06, "epoch": 0.47672495987062047, "total_flos": 454867399633920000, "step": 118500 }, { "loss": 5.045, "learning_rate": 9.119117820019386e-06, "epoch": 0.4771272594148151, "total_flos": 455257372284149760, "step": 118600 }, { "loss": 5.039375, "learning_rate": 9.118306694623455e-06, "epoch": 0.4775295589590097, "total_flos": 455642511703941120, "step": 118700 }, { "loss": 5.12875, "learning_rate": 9.117495569227526e-06, "epoch": 0.4779318585032043, "total_flos": 456032972988456960, "step": 118800 }, { "loss": 5.021875, "learning_rate": 9.116684443831595e-06, "epoch": 0.4783341580473989, "total_flos": 456403336532336640, "step": 118900 }, { "loss": 5.035625, "learning_rate": 9.115873318435665e-06, "epoch": 0.47873645759159356, "total_flos": 456798716027166720, "step": 119000 }, { "loss": 4.994375, "learning_rate": 9.115062193039735e-06, "epoch": 0.4791387571357882, "total_flos": 457173509147074560, "step": 119100 }, { "loss": 5.039375, "learning_rate": 9.114251067643803e-06, "epoch": 0.47954105667998276, "total_flos": 457542528946667520, "step": 119200 }, { "loss": 5.01125, "learning_rate": 9.113439942247873e-06, "epoch": 0.4799433562241774, "total_flos": 457929654771056640, "step": 119300 }, { "loss": 5.078125, "learning_rate": 9.112628816851942e-06, "epoch": 0.480345655768372, "total_flos": 458307847085998080, "step": 119400 }, { "loss": 5.05625, "learning_rate": 9.111817691456012e-06, "epoch": 0.48074795531256664, "total_flos": 458693384848957440, "step": 119500 }, { "loss": 4.9975, "learning_rate": 9.111006566060082e-06, "epoch": 0.4811502548567613, "total_flos": 459069569514332160, "step": 119600 }, { "loss": 4.99, "learning_rate": 9.11019544066415e-06, "epoch": 0.48155255440095585, "total_flos": 459432183955783680, "step": 119700 }, { "loss": 5.054375, "learning_rate": 9.10938431526822e-06, "epoch": 0.4819548539451505, "total_flos": 459835243506892800, "step": 119800 }, { "loss": 5.0725, "learning_rate": 9.10857318987229e-06, "epoch": 0.4823571534893451, "total_flos": 460230792961474560, "step": 119900 }, { "loss": 5.019375, "learning_rate": 9.107762064476359e-06, "epoch": 0.48275945303353973, "total_flos": 460601230862745600, "step": 120000 }, { "loss": 5.08875, "learning_rate": 9.106950939080429e-06, "epoch": 0.4831617525777343, "total_flos": 461002181850685440, "step": 120100 }, { "loss": 5.003125, "learning_rate": 9.106139813684497e-06, "epoch": 0.48356405212192893, "total_flos": 461390757644206080, "step": 120200 }, { "loss": 5.004375, "learning_rate": 9.105328688288567e-06, "epoch": 0.48396635166612356, "total_flos": 461768663152066560, "step": 120300 }, { "loss": 4.98375, "learning_rate": 9.104517562892637e-06, "epoch": 0.4843686512103182, "total_flos": 462153914107944960, "step": 120400 }, { "loss": 4.968125, "learning_rate": 9.103706437496705e-06, "epoch": 0.4847709507545128, "total_flos": 462530762678599680, "step": 120500 }, { "loss": 5.030625, "learning_rate": 9.102895312100775e-06, "epoch": 0.4851732502987074, "total_flos": 462924129212620800, "step": 120600 }, { "loss": 4.985625, "learning_rate": 9.102084186704845e-06, "epoch": 0.485575549842902, "total_flos": 463315684613038080, "step": 120700 }, { "loss": 5.0175, "learning_rate": 9.101273061308914e-06, "epoch": 0.48597784938709665, "total_flos": 463717081745326080, "step": 120800 }, { "loss": 4.980625, "learning_rate": 9.100461935912984e-06, "epoch": 0.4863801489312913, "total_flos": 464097812834058240, "step": 120900 }, { "loss": 5.015, "learning_rate": 9.099650810517052e-06, "epoch": 0.48678244847548585, "total_flos": 464479993891921920, "step": 121000 }, { "loss": 4.960625, "learning_rate": 9.098839685121122e-06, "epoch": 0.4871847480196805, "total_flos": 464855928928911360, "step": 121100 }, { "loss": 5.015625, "learning_rate": 9.098028559725192e-06, "epoch": 0.4875870475638751, "total_flos": 465252636234301440, "step": 121200 }, { "loss": 5.013125, "learning_rate": 9.09721743432926e-06, "epoch": 0.48798934710806974, "total_flos": 465641509457387520, "step": 121300 }, { "loss": 5.019375, "learning_rate": 9.09640630893333e-06, "epoch": 0.48839164665226437, "total_flos": 466029224829665280, "step": 121400 }, { "loss": 4.95, "learning_rate": 9.0955951835374e-06, "epoch": 0.48879394619645894, "total_flos": 466413875615170560, "step": 121500 }, { "loss": 4.98, "learning_rate": 9.094784058141469e-06, "epoch": 0.48919624574065357, "total_flos": 466798170547445760, "step": 121600 }, { "loss": 5.086875, "learning_rate": 9.093972932745539e-06, "epoch": 0.4895985452848482, "total_flos": 467172921177415680, "step": 121700 }, { "loss": 4.99, "learning_rate": 9.093161807349607e-06, "epoch": 0.4900008448290428, "total_flos": 467546434287943680, "step": 121800 }, { "loss": 4.96375, "learning_rate": 9.092350681953677e-06, "epoch": 0.4904031443732374, "total_flos": 467903036403179520, "step": 121900 }, { "loss": 4.989375, "learning_rate": 9.091539556557748e-06, "epoch": 0.490805443917432, "total_flos": 468306212801617920, "step": 122000 }, { "loss": 4.98875, "learning_rate": 9.090728431161816e-06, "epoch": 0.49120774346162666, "total_flos": 468682445268172800, "step": 122100 }, { "loss": 4.918125, "learning_rate": 9.089917305765886e-06, "epoch": 0.4916100430058213, "total_flos": 469073124313620480, "step": 122200 }, { "loss": 4.976875, "learning_rate": 9.089106180369956e-06, "epoch": 0.4920123425500159, "total_flos": 469465009011056640, "step": 122300 }, { "loss": 4.945625, "learning_rate": 9.088295054974024e-06, "epoch": 0.4924146420942105, "total_flos": 469836578206924800, "step": 122400 }, { "loss": 4.96625, "learning_rate": 9.087483929578094e-06, "epoch": 0.4928169416384051, "total_flos": 470221266171125760, "step": 122500 }, { "loss": 4.935, "learning_rate": 9.086672804182163e-06, "epoch": 0.49321924118259974, "total_flos": 470622174669127680, "step": 122600 }, { "loss": 4.970625, "learning_rate": 9.085861678786233e-06, "epoch": 0.49362154072679437, "total_flos": 470992607259156480, "step": 122700 }, { "loss": 5.0275, "learning_rate": 9.085050553390303e-06, "epoch": 0.49402384027098895, "total_flos": 471384884988518400, "step": 122800 }, { "loss": 4.924375, "learning_rate": 9.084239427994371e-06, "epoch": 0.4944261398151836, "total_flos": 471766731438120960, "step": 122900 }, { "loss": 4.984375, "learning_rate": 9.083428302598441e-06, "epoch": 0.4948284393593782, "total_flos": 472158563023134720, "step": 123000 }, { "loss": 4.95125, "learning_rate": 9.082617177202511e-06, "epoch": 0.49523073890357283, "total_flos": 472542305586216960, "step": 123100 }, { "loss": 4.975625, "learning_rate": 9.08180605180658e-06, "epoch": 0.49563303844776746, "total_flos": 472938014378065920, "step": 123200 }, { "loss": 4.9025, "learning_rate": 9.08099492641065e-06, "epoch": 0.49603533799196203, "total_flos": 473316153580584960, "step": 123300 }, { "loss": 5.00375, "learning_rate": 9.080183801014718e-06, "epoch": 0.49643763753615666, "total_flos": 473696948404224000, "step": 123400 }, { "loss": 4.9875, "learning_rate": 9.079372675618788e-06, "epoch": 0.4968399370803513, "total_flos": 474081944420474880, "step": 123500 }, { "loss": 4.94625, "learning_rate": 9.078561550222858e-06, "epoch": 0.4972422366245459, "total_flos": 474475316265738240, "step": 123600 }, { "loss": 4.915, "learning_rate": 9.077750424826926e-06, "epoch": 0.49764453616874055, "total_flos": 474849541082726400, "step": 123700 }, { "loss": 4.930625, "learning_rate": 9.076939299430996e-06, "epoch": 0.4980468357129351, "total_flos": 475208517323243520, "step": 123800 }, { "loss": 4.925625, "learning_rate": 9.076128174035066e-06, "epoch": 0.49844913525712975, "total_flos": 475582056989982720, "step": 123900 }, { "loss": 4.91875, "learning_rate": 9.075317048639135e-06, "epoch": 0.4988514348013244, "total_flos": 475974982690897920, "step": 124000 }, { "loss": 4.931875, "learning_rate": 9.074505923243205e-06, "epoch": 0.499253734345519, "total_flos": 476371525347778560, "step": 124100 }, { "loss": 4.99625, "learning_rate": 9.073694797847275e-06, "epoch": 0.4996560338897136, "total_flos": 476745872323338240, "step": 124200 }, { "loss": 4.95, "learning_rate": 9.072883672451343e-06, "epoch": 0.5000583334339083, "total_flos": 477139159188725760, "step": 124300 }, { "loss": 4.92375, "learning_rate": 9.072072547055413e-06, "epoch": 0.5004606329781028, "total_flos": 477538978882068480, "step": 124400 }, { "loss": 4.945, "learning_rate": 9.071261421659482e-06, "epoch": 0.5008629325222974, "total_flos": 477923735892418560, "step": 124500 }, { "loss": 4.949375, "learning_rate": 9.070450296263552e-06, "epoch": 0.5012652320664921, "total_flos": 478323082885201920, "step": 124600 }, { "loss": 4.963125, "learning_rate": 9.069639170867622e-06, "epoch": 0.5016675316106867, "total_flos": 478714266498662400, "step": 124700 }, { "loss": 4.955, "learning_rate": 9.06882804547169e-06, "epoch": 0.5020698311548814, "total_flos": 479096282908016640, "step": 124800 }, { "loss": 4.979375, "learning_rate": 9.06801692007576e-06, "epoch": 0.5024721306990759, "total_flos": 479481762247311360, "step": 124900 }, { "loss": 4.988125, "learning_rate": 9.06720579467983e-06, "epoch": 0.5028744302432705, "total_flos": 479873864705679360, "step": 125000 }, { "loss": 4.940625, "learning_rate": 9.066394669283899e-06, "epoch": 0.5032767297874652, "total_flos": 480254526748262400, "step": 125100 }, { "loss": 4.963125, "learning_rate": 9.065583543887969e-06, "epoch": 0.5036790293316598, "total_flos": 480671735448698880, "step": 125200 }, { "loss": 4.946875, "learning_rate": 9.064772418492037e-06, "epoch": 0.5040813288758543, "total_flos": 481060736141598720, "step": 125300 }, { "loss": 4.905, "learning_rate": 9.063961293096107e-06, "epoch": 0.504483628420049, "total_flos": 481439879168901120, "step": 125400 }, { "loss": 4.925625, "learning_rate": 9.063150167700177e-06, "epoch": 0.5048859279642436, "total_flos": 481816175370362880, "step": 125500 }, { "loss": 4.964375, "learning_rate": 9.062339042304245e-06, "epoch": 0.5052882275084383, "total_flos": 482222788142530560, "step": 125600 }, { "loss": 4.868125, "learning_rate": 9.061527916908315e-06, "epoch": 0.5056905270526328, "total_flos": 482603333337784320, "step": 125700 }, { "loss": 4.89375, "learning_rate": 9.060716791512385e-06, "epoch": 0.5060928265968274, "total_flos": 482988393088942080, "step": 125800 }, { "loss": 4.90375, "learning_rate": 9.059905666116454e-06, "epoch": 0.5064951261410221, "total_flos": 483367307732828160, "step": 125900 }, { "loss": 4.92875, "learning_rate": 9.059094540720524e-06, "epoch": 0.5068974256852167, "total_flos": 483748330939883520, "step": 126000 }, { "loss": 4.916875, "learning_rate": 9.058283415324592e-06, "epoch": 0.5072997252294114, "total_flos": 484124303155568640, "step": 126100 }, { "loss": 4.94, "learning_rate": 9.057472289928662e-06, "epoch": 0.5077020247736059, "total_flos": 484501300441006080, "step": 126200 }, { "loss": 4.965, "learning_rate": 9.056661164532732e-06, "epoch": 0.5081043243178005, "total_flos": 484897041100308480, "step": 126300 }, { "loss": 4.91, "learning_rate": 9.0558500391368e-06, "epoch": 0.5085066238619952, "total_flos": 485278398915624960, "step": 126400 }, { "loss": 4.876875, "learning_rate": 9.05503891374087e-06, "epoch": 0.5089089234061898, "total_flos": 485668429989519360, "step": 126500 }, { "loss": 4.90375, "learning_rate": 9.05422778834494e-06, "epoch": 0.5093112229503844, "total_flos": 486063751060684800, "step": 126600 }, { "loss": 4.9625, "learning_rate": 9.053416662949009e-06, "epoch": 0.509713522494579, "total_flos": 486456628960419840, "step": 126700 }, { "loss": 4.894375, "learning_rate": 9.052605537553079e-06, "epoch": 0.5101158220387736, "total_flos": 486838406363873280, "step": 126800 }, { "loss": 4.92625, "learning_rate": 9.051794412157147e-06, "epoch": 0.5105181215829683, "total_flos": 487241832390696960, "step": 126900 }, { "loss": 4.933125, "learning_rate": 9.050983286761217e-06, "epoch": 0.5109204211271628, "total_flos": 487647319179509760, "step": 127000 }, { "loss": 4.891875, "learning_rate": 9.050172161365288e-06, "epoch": 0.5113227206713575, "total_flos": 488030222566318080, "step": 127100 }, { "loss": 4.86, "learning_rate": 9.049361035969356e-06, "epoch": 0.5117250202155521, "total_flos": 488399778801377280, "step": 127200 }, { "loss": 4.88375, "learning_rate": 9.048549910573426e-06, "epoch": 0.5121273197597467, "total_flos": 488778688134021120, "step": 127300 }, { "loss": 4.89125, "learning_rate": 9.047738785177496e-06, "epoch": 0.5125296193039414, "total_flos": 489164220585738240, "step": 127400 }, { "loss": 4.896875, "learning_rate": 9.046927659781564e-06, "epoch": 0.5129319188481359, "total_flos": 489557656165908480, "step": 127500 }, { "loss": 4.82375, "learning_rate": 9.046116534385634e-06, "epoch": 0.5133342183923305, "total_flos": 489939353900728320, "step": 127600 }, { "loss": 4.873125, "learning_rate": 9.045305408989703e-06, "epoch": 0.5137365179365252, "total_flos": 490340071194009600, "step": 127700 }, { "loss": 4.8725, "learning_rate": 9.044494283593773e-06, "epoch": 0.5141388174807198, "total_flos": 490727701586411520, "step": 127800 }, { "loss": 4.89125, "learning_rate": 9.043683158197843e-06, "epoch": 0.5145411170249145, "total_flos": 491102064495697920, "step": 127900 }, { "loss": 4.844375, "learning_rate": 9.042872032801911e-06, "epoch": 0.514943416569109, "total_flos": 491478743106600960, "step": 128000 }, { "loss": 4.863125, "learning_rate": 9.042060907405981e-06, "epoch": 0.5153457161133036, "total_flos": 491844135327744000, "step": 128100 }, { "loss": 4.855625, "learning_rate": 9.041249782010051e-06, "epoch": 0.5157480156574983, "total_flos": 492238516309032960, "step": 128200 }, { "loss": 4.889375, "learning_rate": 9.04043865661412e-06, "epoch": 0.5161503152016929, "total_flos": 492645644271697920, "step": 128300 }, { "loss": 4.87875, "learning_rate": 9.03962753121819e-06, "epoch": 0.5165526147458875, "total_flos": 493033279975342080, "step": 128400 }, { "loss": 4.820625, "learning_rate": 9.038816405822258e-06, "epoch": 0.5169549142900821, "total_flos": 493406782463385600, "step": 128500 }, { "loss": 4.845, "learning_rate": 9.038005280426328e-06, "epoch": 0.5173572138342767, "total_flos": 493784294939320320, "step": 128600 }, { "loss": 4.8575, "learning_rate": 9.037194155030398e-06, "epoch": 0.5177595133784714, "total_flos": 494155805711523840, "step": 128700 }, { "loss": 4.931875, "learning_rate": 9.036383029634466e-06, "epoch": 0.5181618129226659, "total_flos": 494544317770137600, "step": 128800 }, { "loss": 4.869375, "learning_rate": 9.035571904238536e-06, "epoch": 0.5185641124668606, "total_flos": 494927794771107840, "step": 128900 }, { "loss": 4.8475, "learning_rate": 9.034760778842606e-06, "epoch": 0.5189664120110552, "total_flos": 495324124978298880, "step": 129000 }, { "loss": 4.88625, "learning_rate": 9.033949653446675e-06, "epoch": 0.5193687115552498, "total_flos": 495698025809510400, "step": 129100 }, { "loss": 4.88375, "learning_rate": 9.033138528050745e-06, "epoch": 0.5197710110994445, "total_flos": 496092242142289920, "step": 129200 }, { "loss": 4.879375, "learning_rate": 9.032327402654815e-06, "epoch": 0.520173310643639, "total_flos": 496488827289108480, "step": 129300 }, { "loss": 4.840625, "learning_rate": 9.031516277258883e-06, "epoch": 0.5205756101878336, "total_flos": 496857586837831680, "step": 129400 }, { "loss": 4.84375, "learning_rate": 9.030705151862953e-06, "epoch": 0.5209779097320283, "total_flos": 497224944218603520, "step": 129500 }, { "loss": 4.858125, "learning_rate": 9.029894026467022e-06, "epoch": 0.5213802092762229, "total_flos": 497616117209579520, "step": 129600 }, { "loss": 4.8625, "learning_rate": 9.029082901071092e-06, "epoch": 0.5217825088204175, "total_flos": 497999243668561920, "step": 129700 }, { "loss": 4.85875, "learning_rate": 9.028271775675162e-06, "epoch": 0.5221848083646121, "total_flos": 498382476352389120, "step": 129800 }, { "loss": 4.808125, "learning_rate": 9.02746065027923e-06, "epoch": 0.5225871079088067, "total_flos": 498754013680803840, "step": 129900 }, { "loss": 4.905, "learning_rate": 9.0266495248833e-06, "epoch": 0.5229894074530014, "total_flos": 499137331344506880, "step": 130000 }, { "loss": 4.885625, "learning_rate": 9.02583839948737e-06, "epoch": 0.523391706997196, "total_flos": 499526794115481600, "step": 130100 }, { "loss": 4.91875, "learning_rate": 9.025027274091439e-06, "epoch": 0.5237940065413906, "total_flos": 499925355044413440, "step": 130200 }, { "loss": 4.86625, "learning_rate": 9.024216148695509e-06, "epoch": 0.5241963060855852, "total_flos": 500316528035389440, "step": 130300 }, { "loss": 4.845625, "learning_rate": 9.023405023299577e-06, "epoch": 0.5245986056297798, "total_flos": 500693801505423360, "step": 130400 }, { "loss": 4.844375, "learning_rate": 9.022593897903647e-06, "epoch": 0.5250009051739745, "total_flos": 501079965994967040, "step": 130500 }, { "loss": 4.788125, "learning_rate": 9.021782772507717e-06, "epoch": 0.525403204718169, "total_flos": 501462200165253120, "step": 130600 }, { "loss": 4.8275, "learning_rate": 9.020971647111785e-06, "epoch": 0.5258055042623637, "total_flos": 501848274363678720, "step": 130700 }, { "loss": 4.865, "learning_rate": 9.020160521715855e-06, "epoch": 0.5262078038065583, "total_flos": 502238103610368000, "step": 130800 }, { "loss": 4.83625, "learning_rate": 9.019349396319925e-06, "epoch": 0.5266101033507529, "total_flos": 502623646684569600, "step": 130900 }, { "loss": 4.81125, "learning_rate": 9.018538270923994e-06, "epoch": 0.5270124028949476, "total_flos": 503021602131886080, "step": 131000 }, { "loss": 4.83625, "learning_rate": 9.017727145528064e-06, "epoch": 0.5274147024391421, "total_flos": 503395933173719040, "step": 131100 }, { "loss": 4.88125, "learning_rate": 9.016916020132132e-06, "epoch": 0.5278170019833367, "total_flos": 503781242553262080, "step": 131200 }, { "loss": 4.843125, "learning_rate": 9.016104894736202e-06, "epoch": 0.5282193015275314, "total_flos": 504175315482501120, "step": 131300 }, { "loss": 4.856875, "learning_rate": 9.015293769340272e-06, "epoch": 0.528621601071726, "total_flos": 504556800767631360, "step": 131400 }, { "loss": 4.845625, "learning_rate": 9.01448264394434e-06, "epoch": 0.5290239006159206, "total_flos": 504949407794012160, "step": 131500 }, { "loss": 4.79625, "learning_rate": 9.01367151854841e-06, "epoch": 0.5294262001601152, "total_flos": 505315968488448000, "step": 131600 }, { "loss": 4.8725, "learning_rate": 9.01286039315248e-06, "epoch": 0.5298284997043098, "total_flos": 505692886105251840, "step": 131700 }, { "loss": 4.8775, "learning_rate": 9.012049267756549e-06, "epoch": 0.5302307992485045, "total_flos": 506085668402626560, "step": 131800 }, { "loss": 4.841875, "learning_rate": 9.011238142360619e-06, "epoch": 0.530633098792699, "total_flos": 506470420101734400, "step": 131900 }, { "loss": 4.811875, "learning_rate": 9.010427016964687e-06, "epoch": 0.5310353983368937, "total_flos": 506857227251589120, "step": 132000 }, { "loss": 4.91, "learning_rate": 9.009615891568757e-06, "epoch": 0.5314376978810883, "total_flos": 507243944110325760, "step": 132100 }, { "loss": 4.84125, "learning_rate": 9.008804766172828e-06, "epoch": 0.5318399974252829, "total_flos": 507626980278190080, "step": 132200 }, { "loss": 4.81, "learning_rate": 9.007993640776896e-06, "epoch": 0.5322422969694776, "total_flos": 508017027285811200, "step": 132300 }, { "loss": 4.800625, "learning_rate": 9.007182515380966e-06, "epoch": 0.5326445965136721, "total_flos": 508405650880512000, "step": 132400 }, { "loss": 4.825, "learning_rate": 9.006371389985036e-06, "epoch": 0.5330468960578668, "total_flos": 508798900567203840, "step": 132500 }, { "loss": 4.806875, "learning_rate": 9.005560264589104e-06, "epoch": 0.5334491956020614, "total_flos": 509199315119677440, "step": 132600 }, { "loss": 4.830625, "learning_rate": 9.004749139193174e-06, "epoch": 0.533851495146256, "total_flos": 509593982908047360, "step": 132700 }, { "loss": 4.8425, "learning_rate": 9.003938013797243e-06, "epoch": 0.5342537946904506, "total_flos": 509985479884800000, "step": 132800 }, { "loss": 4.776875, "learning_rate": 9.003126888401313e-06, "epoch": 0.5346560942346452, "total_flos": 510378235625963520, "step": 132900 }, { "loss": 4.80375, "learning_rate": 9.002315763005383e-06, "epoch": 0.5350583937788399, "total_flos": 510761861341716480, "step": 133000 }, { "loss": 4.7725, "learning_rate": 9.001504637609451e-06, "epoch": 0.5354606933230345, "total_flos": 511148020520017920, "step": 133100 }, { "loss": 4.81625, "learning_rate": 9.000693512213521e-06, "epoch": 0.535862992867229, "total_flos": 511520120840110080, "step": 133200 }, { "loss": 4.776875, "learning_rate": 8.999882386817591e-06, "epoch": 0.5362652924114237, "total_flos": 511889326533181440, "step": 133300 }, { "loss": 4.858125, "learning_rate": 8.99907126142166e-06, "epoch": 0.5366675919556183, "total_flos": 512269021929676800, "step": 133400 }, { "loss": 4.881875, "learning_rate": 8.99826013602573e-06, "epoch": 0.5370698914998129, "total_flos": 512651415437230080, "step": 133500 }, { "loss": 4.864375, "learning_rate": 8.9974490106298e-06, "epoch": 0.5374721910440076, "total_flos": 513022660647321600, "step": 133600 }, { "loss": 4.7775, "learning_rate": 8.996637885233868e-06, "epoch": 0.5378744905882021, "total_flos": 513416239631032320, "step": 133700 }, { "loss": 4.819375, "learning_rate": 8.995826759837938e-06, "epoch": 0.5382767901323968, "total_flos": 513804008115732480, "step": 133800 }, { "loss": 4.770625, "learning_rate": 8.995015634442006e-06, "epoch": 0.5386790896765914, "total_flos": 514185599625707520, "step": 133900 }, { "loss": 4.835, "learning_rate": 8.994204509046076e-06, "epoch": 0.539081389220786, "total_flos": 514590141013401600, "step": 134000 }, { "loss": 4.7625, "learning_rate": 8.993393383650146e-06, "epoch": 0.5394836887649807, "total_flos": 514966989584056320, "step": 134100 }, { "loss": 4.798125, "learning_rate": 8.992582258254215e-06, "epoch": 0.5398859883091752, "total_flos": 515344948204339200, "step": 134200 }, { "loss": 4.86, "learning_rate": 8.991771132858285e-06, "epoch": 0.5402882878533699, "total_flos": 515727049593569280, "step": 134300 }, { "loss": 4.81875, "learning_rate": 8.990960007462355e-06, "epoch": 0.5406905873975645, "total_flos": 516115715678208000, "step": 134400 }, { "loss": 4.790625, "learning_rate": 8.990148882066423e-06, "epoch": 0.541092886941759, "total_flos": 516492999770726400, "step": 134500 }, { "loss": 4.80375, "learning_rate": 8.989337756670493e-06, "epoch": 0.5414951864859537, "total_flos": 516873937997905920, "step": 134600 }, { "loss": 4.79125, "learning_rate": 8.988526631274562e-06, "epoch": 0.5418974860301483, "total_flos": 517245363790233600, "step": 134700 }, { "loss": 4.769375, "learning_rate": 8.987715505878632e-06, "epoch": 0.542299785574343, "total_flos": 517641566527610880, "step": 134800 }, { "loss": 4.755625, "learning_rate": 8.986904380482702e-06, "epoch": 0.5427020851185376, "total_flos": 518029074761441280, "step": 134900 }, { "loss": 4.79375, "learning_rate": 8.98609325508677e-06, "epoch": 0.5431043846627321, "total_flos": 518405376274145280, "step": 135000 }, { "loss": 4.82125, "learning_rate": 8.98528212969084e-06, "epoch": 0.5435066842069268, "total_flos": 518782596631756800, "step": 135100 }, { "loss": 4.805, "learning_rate": 8.98447100429491e-06, "epoch": 0.5439089837511214, "total_flos": 519169849925959680, "step": 135200 }, { "loss": 4.779375, "learning_rate": 8.983659878898979e-06, "epoch": 0.544311283295316, "total_flos": 519554596313825280, "step": 135300 }, { "loss": 4.7475, "learning_rate": 8.982848753503049e-06, "epoch": 0.5447135828395107, "total_flos": 519922394527703040, "step": 135400 }, { "loss": 4.745, "learning_rate": 8.982037628107117e-06, "epoch": 0.5451158823837052, "total_flos": 520302727273267200, "step": 135500 }, { "loss": 4.786875, "learning_rate": 8.981226502711187e-06, "epoch": 0.5455181819278999, "total_flos": 520692827393310720, "step": 135600 }, { "loss": 4.819375, "learning_rate": 8.980415377315257e-06, "epoch": 0.5459204814720945, "total_flos": 521075300569497600, "step": 135700 }, { "loss": 4.769375, "learning_rate": 8.979604251919325e-06, "epoch": 0.5463227810162891, "total_flos": 521461778422333440, "step": 135800 }, { "loss": 4.7925, "learning_rate": 8.978793126523395e-06, "epoch": 0.5467250805604837, "total_flos": 521839567082864640, "step": 135900 }, { "loss": 4.74375, "learning_rate": 8.977982001127465e-06, "epoch": 0.5471273801046783, "total_flos": 522232970795581440, "step": 136000 }, { "loss": 4.785625, "learning_rate": 8.977170875731534e-06, "epoch": 0.547529679648873, "total_flos": 522624526195998720, "step": 136100 }, { "loss": 4.8025, "learning_rate": 8.976359750335604e-06, "epoch": 0.5479319791930676, "total_flos": 523014058013122560, "step": 136200 }, { "loss": 4.769375, "learning_rate": 8.975548624939672e-06, "epoch": 0.5483342787372621, "total_flos": 523397067624775680, "step": 136300 }, { "loss": 4.775625, "learning_rate": 8.974737499543744e-06, "epoch": 0.5487365782814568, "total_flos": 523782520407859200, "step": 136400 }, { "loss": 4.770625, "learning_rate": 8.973926374147812e-06, "epoch": 0.5491388778256514, "total_flos": 524172636461629440, "step": 136500 }, { "loss": 4.783125, "learning_rate": 8.97311524875188e-06, "epoch": 0.5495411773698461, "total_flos": 524549644369551360, "step": 136600 }, { "loss": 4.74875, "learning_rate": 8.97230412335595e-06, "epoch": 0.5499434769140407, "total_flos": 524943903192268800, "step": 136700 }, { "loss": 4.751875, "learning_rate": 8.97149299796002e-06, "epoch": 0.5503457764582352, "total_flos": 525332287781068800, "step": 136800 }, { "loss": 4.725625, "learning_rate": 8.970681872564089e-06, "epoch": 0.5507480760024299, "total_flos": 525709417847562240, "step": 136900 }, { "loss": 4.738125, "learning_rate": 8.969870747168159e-06, "epoch": 0.5511503755466245, "total_flos": 526063773307330560, "step": 137000 }, { "loss": 4.77625, "learning_rate": 8.969059621772227e-06, "epoch": 0.5515526750908191, "total_flos": 526447356533145600, "step": 137100 }, { "loss": 4.784375, "learning_rate": 8.9682484963763e-06, "epoch": 0.5519549746350138, "total_flos": 526833207659397120, "step": 137200 }, { "loss": 4.770625, "learning_rate": 8.967437370980368e-06, "epoch": 0.5523572741792083, "total_flos": 527224205379379200, "step": 137300 }, { "loss": 4.765, "learning_rate": 8.966626245584436e-06, "epoch": 0.552759573723403, "total_flos": 527614289565696000, "step": 137400 }, { "loss": 4.715625, "learning_rate": 8.965815120188506e-06, "epoch": 0.5531618732675976, "total_flos": 527990033397964800, "step": 137500 }, { "loss": 4.74, "learning_rate": 8.965003994792576e-06, "epoch": 0.5535641728117922, "total_flos": 528351930821713920, "step": 137600 }, { "loss": 4.765625, "learning_rate": 8.964192869396644e-06, "epoch": 0.5539664723559868, "total_flos": 528721391454412800, "step": 137700 }, { "loss": 4.785625, "learning_rate": 8.963381744000714e-06, "epoch": 0.5543687719001814, "total_flos": 529108156114329600, "step": 137800 }, { "loss": 4.685625, "learning_rate": 8.962570618604783e-06, "epoch": 0.5547710714443761, "total_flos": 529491909299896320, "step": 137900 }, { "loss": 4.7525, "learning_rate": 8.961759493208854e-06, "epoch": 0.5551733709885707, "total_flos": 529887835852677120, "step": 138000 }, { "loss": 4.729375, "learning_rate": 8.960948367812923e-06, "epoch": 0.5555756705327652, "total_flos": 530274935120855040, "step": 138100 }, { "loss": 4.750625, "learning_rate": 8.960137242416991e-06, "epoch": 0.5559779700769599, "total_flos": 530665162710712320, "step": 138200 }, { "loss": 4.783125, "learning_rate": 8.959326117021063e-06, "epoch": 0.5563802696211545, "total_flos": 531065779090391040, "step": 138300 }, { "loss": 4.765, "learning_rate": 8.958514991625131e-06, "epoch": 0.5567825691653492, "total_flos": 531449250780119040, "step": 138400 }, { "loss": 4.756875, "learning_rate": 8.957703866229201e-06, "epoch": 0.5571848687095438, "total_flos": 531848401256939520, "step": 138500 }, { "loss": 4.773125, "learning_rate": 8.95689274083327e-06, "epoch": 0.5575871682537383, "total_flos": 532236727422074880, "step": 138600 }, { "loss": 4.73, "learning_rate": 8.95608161543734e-06, "epoch": 0.557989467797933, "total_flos": 532626487622615040, "step": 138700 }, { "loss": 4.754375, "learning_rate": 8.95527049004141e-06, "epoch": 0.5583917673421276, "total_flos": 533012317503897600, "step": 138800 }, { "loss": 4.7725, "learning_rate": 8.954459364645478e-06, "epoch": 0.5587940668863223, "total_flos": 533401387242946560, "step": 138900 }, { "loss": 4.738125, "learning_rate": 8.953648239249546e-06, "epoch": 0.5591963664305168, "total_flos": 533777741868072960, "step": 139000 }, { "loss": 4.76125, "learning_rate": 8.952837113853618e-06, "epoch": 0.5595986659747114, "total_flos": 534143973265489920, "step": 139100 }, { "loss": 4.74, "learning_rate": 8.952025988457686e-06, "epoch": 0.5600009655189061, "total_flos": 534534253967769600, "step": 139200 }, { "loss": 4.758125, "learning_rate": 8.951214863061757e-06, "epoch": 0.5604032650631007, "total_flos": 534913715669606400, "step": 139300 }, { "loss": 4.73875, "learning_rate": 8.950403737665825e-06, "epoch": 0.5608055646072952, "total_flos": 535298557659832320, "step": 139400 }, { "loss": 4.7275, "learning_rate": 8.949592612269895e-06, "epoch": 0.5612078641514899, "total_flos": 535701224179015680, "step": 139500 }, { "loss": 4.708125, "learning_rate": 8.948781486873965e-06, "epoch": 0.5616101636956845, "total_flos": 536083622997811200, "step": 139600 }, { "loss": 4.74125, "learning_rate": 8.947970361478033e-06, "epoch": 0.5620124632398792, "total_flos": 536496232162467840, "step": 139700 }, { "loss": 4.705, "learning_rate": 8.947159236082102e-06, "epoch": 0.5624147627840738, "total_flos": 536887182081269760, "step": 139800 }, { "loss": 4.696875, "learning_rate": 8.946348110686173e-06, "epoch": 0.5628170623282683, "total_flos": 537264057208135680, "step": 139900 }, { "loss": 4.725625, "learning_rate": 8.945536985290242e-06, "epoch": 0.563219361872463, "total_flos": 537642637243760640, "step": 140000 }, { "loss": 4.733125, "learning_rate": 8.944725859894312e-06, "epoch": 0.5636216614166576, "total_flos": 538029226632683520, "step": 140100 }, { "loss": 4.745, "learning_rate": 8.94391473449838e-06, "epoch": 0.5640239609608523, "total_flos": 538415396433469440, "step": 140200 }, { "loss": 4.713125, "learning_rate": 8.94310360910245e-06, "epoch": 0.5644262605050469, "total_flos": 538788027877785600, "step": 140300 }, { "loss": 4.701875, "learning_rate": 8.94229248370652e-06, "epoch": 0.5648285600492414, "total_flos": 539164717111173120, "step": 140400 }, { "loss": 4.6975, "learning_rate": 8.941481358310589e-06, "epoch": 0.5652308595934361, "total_flos": 539550562926182400, "step": 140500 }, { "loss": 4.658125, "learning_rate": 8.940670232914657e-06, "epoch": 0.5656331591376307, "total_flos": 539935638611066880, "step": 140600 }, { "loss": 4.684375, "learning_rate": 8.939859107518729e-06, "epoch": 0.5660354586818254, "total_flos": 540308195697991680, "step": 140700 }, { "loss": 4.755625, "learning_rate": 8.939047982122797e-06, "epoch": 0.5664377582260199, "total_flos": 540688087610449920, "step": 140800 }, { "loss": 4.748125, "learning_rate": 8.938236856726867e-06, "epoch": 0.5668400577702145, "total_flos": 541069333889679360, "step": 140900 }, { "loss": 4.6975, "learning_rate": 8.937425731330935e-06, "epoch": 0.5672423573144092, "total_flos": 541460135093698560, "step": 141000 }, { "loss": 4.745625, "learning_rate": 8.936614605935005e-06, "epoch": 0.5676446568586038, "total_flos": 541844589363240960, "step": 141100 }, { "loss": 4.628125, "learning_rate": 8.935803480539075e-06, "epoch": 0.5680469564027983, "total_flos": 542234131802849280, "step": 141200 }, { "loss": 4.743125, "learning_rate": 8.934992355143144e-06, "epoch": 0.568449255946993, "total_flos": 542622898801090560, "step": 141300 }, { "loss": 4.679375, "learning_rate": 8.934181229747214e-06, "epoch": 0.5688515554911876, "total_flos": 543015383668899840, "step": 141400 }, { "loss": 4.706875, "learning_rate": 8.933370104351284e-06, "epoch": 0.5692538550353823, "total_flos": 543422049553489920, "step": 141500 }, { "loss": 4.686875, "learning_rate": 8.932558978955352e-06, "epoch": 0.5696561545795769, "total_flos": 543812372745707520, "step": 141600 }, { "loss": 4.7, "learning_rate": 8.931747853559422e-06, "epoch": 0.5700584541237714, "total_flos": 544187351759093760, "step": 141700 }, { "loss": 4.65125, "learning_rate": 8.93093672816349e-06, "epoch": 0.5704607536679661, "total_flos": 544561459728752640, "step": 141800 }, { "loss": 4.706875, "learning_rate": 8.93012560276756e-06, "epoch": 0.5708630532121607, "total_flos": 544944830504878080, "step": 141900 }, { "loss": 4.65125, "learning_rate": 8.92931447737163e-06, "epoch": 0.5712653527563554, "total_flos": 545312920837079040, "step": 142000 }, { "loss": 4.68125, "learning_rate": 8.928503351975699e-06, "epoch": 0.5716676523005499, "total_flos": 545687761758167040, "step": 142100 }, { "loss": 4.66375, "learning_rate": 8.927692226579769e-06, "epoch": 0.5720699518447445, "total_flos": 546064913069629440, "step": 142200 }, { "loss": 4.721875, "learning_rate": 8.92688110118384e-06, "epoch": 0.5724722513889392, "total_flos": 546449770993582080, "step": 142300 }, { "loss": 4.669375, "learning_rate": 8.926069975787908e-06, "epoch": 0.5728745509331338, "total_flos": 546825583872000000, "step": 142400 }, { "loss": 4.67, "learning_rate": 8.925258850391978e-06, "epoch": 0.5732768504773285, "total_flos": 547225217671864320, "step": 142500 }, { "loss": 4.713125, "learning_rate": 8.924447724996046e-06, "epoch": 0.573679150021523, "total_flos": 547599235350405120, "step": 142600 }, { "loss": 4.68125, "learning_rate": 8.923636599600116e-06, "epoch": 0.5740814495657176, "total_flos": 547984927139389440, "step": 142700 }, { "loss": 4.7075, "learning_rate": 8.922825474204186e-06, "epoch": 0.5744837491099123, "total_flos": 548371266899927040, "step": 142800 }, { "loss": 4.649375, "learning_rate": 8.922014348808254e-06, "epoch": 0.5748860486541069, "total_flos": 548759077874565120, "step": 142900 }, { "loss": 4.6375, "learning_rate": 8.921203223412324e-06, "epoch": 0.5752883481983015, "total_flos": 549134120622858240, "step": 143000 }, { "loss": 4.7175, "learning_rate": 8.920392098016394e-06, "epoch": 0.5756906477424961, "total_flos": 549529255800545280, "step": 143100 }, { "loss": 4.7, "learning_rate": 8.919580972620463e-06, "epoch": 0.5760929472866907, "total_flos": 549909790373314560, "step": 143200 }, { "loss": 4.7075, "learning_rate": 8.918769847224533e-06, "epoch": 0.5764952468308854, "total_flos": 550288269495336960, "step": 143300 }, { "loss": 4.739375, "learning_rate": 8.917958721828603e-06, "epoch": 0.57689754637508, "total_flos": 550688322883338240, "step": 143400 }, { "loss": 4.690625, "learning_rate": 8.917147596432671e-06, "epoch": 0.5772998459192745, "total_flos": 551081885933322240, "step": 143500 }, { "loss": 4.67625, "learning_rate": 8.916336471036741e-06, "epoch": 0.5777021454634692, "total_flos": 551471194678272000, "step": 143600 }, { "loss": 4.63125, "learning_rate": 8.91552534564081e-06, "epoch": 0.5781044450076638, "total_flos": 551864502788628480, "step": 143700 }, { "loss": 4.693125, "learning_rate": 8.91471422024488e-06, "epoch": 0.5785067445518585, "total_flos": 552245966828789760, "step": 143800 }, { "loss": 4.651875, "learning_rate": 8.91390309484895e-06, "epoch": 0.578909044096053, "total_flos": 552627776099696640, "step": 143900 }, { "loss": 4.685, "learning_rate": 8.913091969453018e-06, "epoch": 0.5793113436402476, "total_flos": 553003987321282560, "step": 144000 }, { "loss": 4.64875, "learning_rate": 8.912280844057088e-06, "epoch": 0.5797136431844423, "total_flos": 553389100184862720, "step": 144100 }, { "loss": 4.665, "learning_rate": 8.911469718661158e-06, "epoch": 0.5801159427286369, "total_flos": 553774717616455680, "step": 144200 }, { "loss": 4.655625, "learning_rate": 8.910658593265226e-06, "epoch": 0.5805182422728316, "total_flos": 554155002560839680, "step": 144300 }, { "loss": 4.641875, "learning_rate": 8.909847467869297e-06, "epoch": 0.5809205418170261, "total_flos": 554535170657894400, "step": 144400 }, { "loss": 4.63375, "learning_rate": 8.909036342473365e-06, "epoch": 0.5813228413612207, "total_flos": 554904918097674240, "step": 144500 }, { "loss": 4.71375, "learning_rate": 8.908225217077435e-06, "epoch": 0.5817251409054154, "total_flos": 555290137186099200, "step": 144600 }, { "loss": 4.664375, "learning_rate": 8.907414091681505e-06, "epoch": 0.58212744044961, "total_flos": 555687657111552000, "step": 144700 }, { "loss": 4.65375, "learning_rate": 8.906602966285573e-06, "epoch": 0.5825297399938046, "total_flos": 556071484654510080, "step": 144800 }, { "loss": 4.633125, "learning_rate": 8.905791840889643e-06, "epoch": 0.5829320395379992, "total_flos": 556462312414740480, "step": 144900 }, { "loss": 4.645, "learning_rate": 8.904980715493713e-06, "epoch": 0.5833343390821938, "total_flos": 556834157795205120, "step": 145000 }, { "loss": 4.665, "learning_rate": 8.904169590097782e-06, "epoch": 0.5837366386263885, "total_flos": 557214586143129600, "step": 145100 }, { "loss": 4.669375, "learning_rate": 8.903358464701852e-06, "epoch": 0.584138938170583, "total_flos": 557593559210680320, "step": 145200 }, { "loss": 4.575, "learning_rate": 8.90254733930592e-06, "epoch": 0.5845412377147776, "total_flos": 557977737295626240, "step": 145300 }, { "loss": 4.544375, "learning_rate": 8.90173621390999e-06, "epoch": 0.5849435372589723, "total_flos": 558346178169815040, "step": 145400 }, { "loss": 4.614375, "learning_rate": 8.90092508851406e-06, "epoch": 0.5853458368031669, "total_flos": 558726298465689600, "step": 145500 }, { "loss": 4.660625, "learning_rate": 8.900113963118129e-06, "epoch": 0.5857481363473616, "total_flos": 559096407069941760, "step": 145600 }, { "loss": 4.66125, "learning_rate": 8.899302837722199e-06, "epoch": 0.5861504358915561, "total_flos": 559482693718056960, "step": 145700 }, { "loss": 4.625625, "learning_rate": 8.898491712326269e-06, "epoch": 0.5865527354357507, "total_flos": 559860275240140800, "step": 145800 }, { "loss": 4.626875, "learning_rate": 8.897680586930337e-06, "epoch": 0.5869550349799454, "total_flos": 560239837855580160, "step": 145900 }, { "loss": 4.63875, "learning_rate": 8.896869461534407e-06, "epoch": 0.58735733452414, "total_flos": 560626416622018560, "step": 146000 }, { "loss": 4.611875, "learning_rate": 8.896058336138475e-06, "epoch": 0.5877596340683346, "total_flos": 561002888094474240, "step": 146100 }, { "loss": 4.665, "learning_rate": 8.895247210742545e-06, "epoch": 0.5881619336125292, "total_flos": 561402070438748160, "step": 146200 }, { "loss": 4.62125, "learning_rate": 8.894436085346615e-06, "epoch": 0.5885642331567238, "total_flos": 561783693816176640, "step": 146300 }, { "loss": 4.638125, "learning_rate": 8.893624959950684e-06, "epoch": 0.5889665327009185, "total_flos": 562169183777955840, "step": 146400 }, { "loss": 4.620625, "learning_rate": 8.892813834554754e-06, "epoch": 0.589368832245113, "total_flos": 562552411150540800, "step": 146500 }, { "loss": 4.66125, "learning_rate": 8.892002709158824e-06, "epoch": 0.5897711317893077, "total_flos": 562918116734976000, "step": 146600 }, { "loss": 4.631875, "learning_rate": 8.891191583762892e-06, "epoch": 0.5901734313335023, "total_flos": 563311823188500480, "step": 146700 }, { "loss": 4.59, "learning_rate": 8.890380458366962e-06, "epoch": 0.5905757308776969, "total_flos": 563682670055424000, "step": 146800 }, { "loss": 4.63625, "learning_rate": 8.88956933297103e-06, "epoch": 0.5909780304218916, "total_flos": 564076636759818240, "step": 146900 }, { "loss": 4.668125, "learning_rate": 8.8887582075751e-06, "epoch": 0.5913803299660861, "total_flos": 564450102069166080, "step": 147000 }, { "loss": 4.685, "learning_rate": 8.88794708217917e-06, "epoch": 0.5917826295102807, "total_flos": 564846963400581120, "step": 147100 }, { "loss": 4.606875, "learning_rate": 8.887135956783239e-06, "epoch": 0.5921849290544754, "total_flos": 565220603980922880, "step": 147200 }, { "loss": 4.56, "learning_rate": 8.886324831387309e-06, "epoch": 0.59258722859867, "total_flos": 565600187841331200, "step": 147300 }, { "loss": 4.544375, "learning_rate": 8.88551370599138e-06, "epoch": 0.5929895281428647, "total_flos": 565969956526080000, "step": 147400 }, { "loss": 4.640625, "learning_rate": 8.884702580595448e-06, "epoch": 0.5933918276870592, "total_flos": 566370121450168320, "step": 147500 }, { "loss": 4.63125, "learning_rate": 8.883891455199518e-06, "epoch": 0.5937941272312538, "total_flos": 566764475875246080, "step": 147600 }, { "loss": 4.67125, "learning_rate": 8.883080329803588e-06, "epoch": 0.5941964267754485, "total_flos": 567141967106211840, "step": 147700 }, { "loss": 4.6275, "learning_rate": 8.882269204407656e-06, "epoch": 0.5945987263196431, "total_flos": 567545993303408640, "step": 147800 }, { "loss": 4.585625, "learning_rate": 8.881458079011726e-06, "epoch": 0.5950010258638377, "total_flos": 567927781329346560, "step": 147900 }, { "loss": 4.61375, "learning_rate": 8.880646953615794e-06, "epoch": 0.5954033254080323, "total_flos": 568306180782735360, "step": 148000 }, { "loss": 4.600625, "learning_rate": 8.879835828219864e-06, "epoch": 0.5958056249522269, "total_flos": 568692467430850560, "step": 148100 }, { "loss": 4.576875, "learning_rate": 8.879024702823934e-06, "epoch": 0.5962079244964216, "total_flos": 569085111635927040, "step": 148200 }, { "loss": 4.5775, "learning_rate": 8.878213577428003e-06, "epoch": 0.5966102240406161, "total_flos": 569458481342914560, "step": 148300 }, { "loss": 4.63, "learning_rate": 8.877402452032073e-06, "epoch": 0.5970125235848108, "total_flos": 569851210527866880, "step": 148400 }, { "loss": 4.5875, "learning_rate": 8.876591326636143e-06, "epoch": 0.5974148231290054, "total_flos": 570228749560012800, "step": 148500 }, { "loss": 4.654375, "learning_rate": 8.875780201240211e-06, "epoch": 0.5978171226732, "total_flos": 570607589846507520, "step": 148600 }, { "loss": 4.603125, "learning_rate": 8.874969075844281e-06, "epoch": 0.5982194222173947, "total_flos": 570993802137231360, "step": 148700 }, { "loss": 4.635625, "learning_rate": 8.87415795044835e-06, "epoch": 0.5986217217615892, "total_flos": 571392405556101120, "step": 148800 }, { "loss": 4.66125, "learning_rate": 8.87334682505242e-06, "epoch": 0.5990240213057839, "total_flos": 571773349094522880, "step": 148900 }, { "loss": 4.579375, "learning_rate": 8.87253569965649e-06, "epoch": 0.5994263208499785, "total_flos": 572158839056302080, "step": 149000 }, { "loss": 4.595, "learning_rate": 8.871724574260558e-06, "epoch": 0.5998286203941731, "total_flos": 572541742443110400, "step": 149100 }, { "loss": 4.636875, "learning_rate": 8.870913448864628e-06, "epoch": 0.6002309199383677, "total_flos": 572934800925081600, "step": 149200 }, { "loss": 4.603125, "learning_rate": 8.870102323468698e-06, "epoch": 0.6006332194825623, "total_flos": 573318097343815680, "step": 149300 }, { "loss": 4.553125, "learning_rate": 8.869291198072766e-06, "epoch": 0.6010355190267569, "total_flos": 573693878354780160, "step": 149400 }, { "loss": 4.55125, "learning_rate": 8.868480072676837e-06, "epoch": 0.6014378185709516, "total_flos": 574058824431575040, "step": 149500 }, { "loss": 4.5725, "learning_rate": 8.867668947280905e-06, "epoch": 0.6018401181151461, "total_flos": 574439921996021760, "step": 149600 }, { "loss": 4.560625, "learning_rate": 8.866857821884975e-06, "epoch": 0.6022424176593408, "total_flos": 574818167423385600, "step": 149700 }, { "loss": 4.6, "learning_rate": 8.866046696489045e-06, "epoch": 0.6026447172035354, "total_flos": 575201899363983360, "step": 149800 }, { "loss": 4.559375, "learning_rate": 8.865235571093113e-06, "epoch": 0.60304701674773, "total_flos": 575579645534576640, "step": 149900 }, { "loss": 4.55375, "learning_rate": 8.864424445697183e-06, "epoch": 0.6034493162919247, "total_flos": 575959388732252160, "step": 150000 }, { "loss": 4.565625, "learning_rate": 8.863613320301253e-06, "epoch": 0.6038516158361192, "total_flos": 576351230939750400, "step": 150100 }, { "loss": 4.6175, "learning_rate": 8.862802194905322e-06, "epoch": 0.6042539153803139, "total_flos": 576736434094448640, "step": 150200 }, { "loss": 4.56875, "learning_rate": 8.861991069509392e-06, "epoch": 0.6046562149245085, "total_flos": 577115040686284800, "step": 150300 }, { "loss": 4.554375, "learning_rate": 8.86117994411346e-06, "epoch": 0.6050585144687031, "total_flos": 577495123803463680, "step": 150400 }, { "loss": 4.67375, "learning_rate": 8.86036881871753e-06, "epoch": 0.6054608140128978, "total_flos": 577881936264560640, "step": 150500 }, { "loss": 4.62125, "learning_rate": 8.8595576933216e-06, "epoch": 0.6058631135570923, "total_flos": 578276731522744320, "step": 150600 }, { "loss": 4.619375, "learning_rate": 8.858746567925669e-06, "epoch": 0.606265413101287, "total_flos": 578660713091727360, "step": 150700 }, { "loss": 4.556875, "learning_rate": 8.857935442529739e-06, "epoch": 0.6066677126454816, "total_flos": 579056379393638400, "step": 150800 }, { "loss": 4.5525, "learning_rate": 8.857124317133809e-06, "epoch": 0.6070700121896762, "total_flos": 579455168705986560, "step": 150900 }, { "loss": 4.56625, "learning_rate": 8.856313191737877e-06, "epoch": 0.6074723117338708, "total_flos": 579840945474846720, "step": 151000 }, { "loss": 4.550625, "learning_rate": 8.855502066341947e-06, "epoch": 0.6078746112780654, "total_flos": 580214671035064320, "step": 151100 }, { "loss": 4.609375, "learning_rate": 8.854690940946015e-06, "epoch": 0.60827691082226, "total_flos": 580601488807403520, "step": 151200 }, { "loss": 4.629375, "learning_rate": 8.853879815550085e-06, "epoch": 0.6086792103664547, "total_flos": 580998068642979840, "step": 151300 }, { "loss": 4.570625, "learning_rate": 8.853068690154155e-06, "epoch": 0.6090815099106492, "total_flos": 581384658031902720, "step": 151400 }, { "loss": 4.62125, "learning_rate": 8.852257564758224e-06, "epoch": 0.6094838094548439, "total_flos": 581771327089459200, "step": 151500 }, { "loss": 4.65375, "learning_rate": 8.851446439362294e-06, "epoch": 0.6098861089990385, "total_flos": 582169988931993600, "step": 151600 }, { "loss": 4.611875, "learning_rate": 8.850635313966364e-06, "epoch": 0.6102884085432331, "total_flos": 582546471026933760, "step": 151700 }, { "loss": 4.575625, "learning_rate": 8.849824188570432e-06, "epoch": 0.6106907080874278, "total_flos": 582935014953000960, "step": 151800 }, { "loss": 4.55125, "learning_rate": 8.849013063174502e-06, "epoch": 0.6110930076316223, "total_flos": 583312931083345920, "step": 151900 }, { "loss": 4.57, "learning_rate": 8.84820193777857e-06, "epoch": 0.611495307175817, "total_flos": 583697024188416000, "step": 152000 }, { "loss": 4.56625, "learning_rate": 8.84739081238264e-06, "epoch": 0.6118976067200116, "total_flos": 584088032530882560, "step": 152100 }, { "loss": 4.6025, "learning_rate": 8.84657968698671e-06, "epoch": 0.6122999062642062, "total_flos": 584498267570257920, "step": 152200 }, { "loss": 4.58125, "learning_rate": 8.845768561590779e-06, "epoch": 0.6127022058084008, "total_flos": 584873140358799360, "step": 152300 }, { "loss": 4.53875, "learning_rate": 8.844957436194849e-06, "epoch": 0.6131045053525954, "total_flos": 585253324389580800, "step": 152400 }, { "loss": 4.555, "learning_rate": 8.84414631079892e-06, "epoch": 0.6135068048967901, "total_flos": 585637481229557760, "step": 152500 }, { "loss": 4.536875, "learning_rate": 8.843335185402988e-06, "epoch": 0.6139091044409847, "total_flos": 586017373142016000, "step": 152600 }, { "loss": 4.52375, "learning_rate": 8.842524060007058e-06, "epoch": 0.6143114039851792, "total_flos": 586407207699947520, "step": 152700 }, { "loss": 4.566875, "learning_rate": 8.841712934611128e-06, "epoch": 0.6147137035293739, "total_flos": 586798051393904640, "step": 152800 }, { "loss": 4.55125, "learning_rate": 8.840901809215196e-06, "epoch": 0.6151160030735685, "total_flos": 587171399855923200, "step": 152900 }, { "loss": 4.59, "learning_rate": 8.840090683819266e-06, "epoch": 0.6155183026177631, "total_flos": 587540281563217920, "step": 153000 }, { "loss": 4.55, "learning_rate": 8.839279558423334e-06, "epoch": 0.6159206021619578, "total_flos": 587925128864686080, "step": 153100 }, { "loss": 4.600625, "learning_rate": 8.838468433027404e-06, "epoch": 0.6163229017061523, "total_flos": 588297940891238400, "step": 153200 }, { "loss": 4.551875, "learning_rate": 8.837657307631474e-06, "epoch": 0.616725201250347, "total_flos": 588678884429660160, "step": 153300 }, { "loss": 4.575, "learning_rate": 8.836846182235543e-06, "epoch": 0.6171275007945416, "total_flos": 589072585571942400, "step": 153400 }, { "loss": 4.583125, "learning_rate": 8.836035056839613e-06, "epoch": 0.6175298003387362, "total_flos": 589466955930746880, "step": 153500 }, { "loss": 4.550625, "learning_rate": 8.835223931443683e-06, "epoch": 0.6179320998829309, "total_flos": 589856965759672320, "step": 153600 }, { "loss": 4.564375, "learning_rate": 8.834412806047751e-06, "epoch": 0.6183343994271254, "total_flos": 590251075867607040, "step": 153700 }, { "loss": 4.568125, "learning_rate": 8.833601680651821e-06, "epoch": 0.6187366989713201, "total_flos": 590641356569886720, "step": 153800 }, { "loss": 4.540625, "learning_rate": 8.83279055525589e-06, "epoch": 0.6191389985155147, "total_flos": 591020287147499520, "step": 153900 }, { "loss": 4.55125, "learning_rate": 8.83197942985996e-06, "epoch": 0.6195412980597093, "total_flos": 591401910524928000, "step": 154000 }, { "loss": 4.594375, "learning_rate": 8.83116830446403e-06, "epoch": 0.6199435976039039, "total_flos": 591774016156262400, "step": 154100 }, { "loss": 4.565, "learning_rate": 8.830357179068098e-06, "epoch": 0.6203458971480985, "total_flos": 592153812466360320, "step": 154200 }, { "loss": 4.57, "learning_rate": 8.829546053672168e-06, "epoch": 0.6207481966922932, "total_flos": 592536471536025600, "step": 154300 }, { "loss": 4.54875, "learning_rate": 8.828734928276238e-06, "epoch": 0.6211504962364878, "total_flos": 592918270184448000, "step": 154400 }, { "loss": 4.533125, "learning_rate": 8.827923802880306e-06, "epoch": 0.6215527957806823, "total_flos": 593286328649195520, "step": 154500 }, { "loss": 4.46625, "learning_rate": 8.827112677484377e-06, "epoch": 0.621955095324877, "total_flos": 593670251794513920, "step": 154600 }, { "loss": 4.539375, "learning_rate": 8.826301552088445e-06, "epoch": 0.6223573948690716, "total_flos": 594037651665223680, "step": 154700 }, { "loss": 4.515625, "learning_rate": 8.825490426692515e-06, "epoch": 0.6227596944132663, "total_flos": 594404791285063680, "step": 154800 }, { "loss": 4.458125, "learning_rate": 8.824679301296585e-06, "epoch": 0.6231619939574609, "total_flos": 594798890770513920, "step": 154900 }, { "loss": 4.53125, "learning_rate": 8.823868175900653e-06, "epoch": 0.6235642935016554, "total_flos": 595186027217387520, "step": 155000 }, { "loss": 4.514375, "learning_rate": 8.823057050504723e-06, "epoch": 0.6239665930458501, "total_flos": 595585151137996800, "step": 155100 }, { "loss": 4.53375, "learning_rate": 8.822245925108793e-06, "epoch": 0.6243688925900447, "total_flos": 595965356413747200, "step": 155200 }, { "loss": 4.559375, "learning_rate": 8.821434799712862e-06, "epoch": 0.6247711921342393, "total_flos": 596367916708085760, "step": 155300 }, { "loss": 4.50625, "learning_rate": 8.820623674316932e-06, "epoch": 0.625173491678434, "total_flos": 596764130067947520, "step": 155400 }, { "loss": 4.536875, "learning_rate": 8.819812548921e-06, "epoch": 0.6255757912226285, "total_flos": 597146741336432640, "step": 155500 }, { "loss": 4.4925, "learning_rate": 8.81900142352507e-06, "epoch": 0.6259780907668232, "total_flos": 597523435881062400, "step": 155600 }, { "loss": 4.50625, "learning_rate": 8.81819029812914e-06, "epoch": 0.6263803903110178, "total_flos": 597896938369105920, "step": 155700 }, { "loss": 4.498125, "learning_rate": 8.817379172733209e-06, "epoch": 0.6267826898552123, "total_flos": 598277733192744960, "step": 155800 }, { "loss": 4.45, "learning_rate": 8.816568047337279e-06, "epoch": 0.627184989399407, "total_flos": 598656143268618240, "step": 155900 }, { "loss": 4.511875, "learning_rate": 8.815756921941349e-06, "epoch": 0.6275872889436016, "total_flos": 599033528274739200, "step": 156000 }, { "loss": 4.533125, "learning_rate": 8.814945796545417e-06, "epoch": 0.6279895884877963, "total_flos": 599419809611612160, "step": 156100 }, { "loss": 4.55625, "learning_rate": 8.814134671149487e-06, "epoch": 0.6283918880319909, "total_flos": 599800004264878080, "step": 156200 }, { "loss": 4.496875, "learning_rate": 8.813323545753555e-06, "epoch": 0.6287941875761854, "total_flos": 600174606180065280, "step": 156300 }, { "loss": 4.496875, "learning_rate": 8.812512420357625e-06, "epoch": 0.6291964871203801, "total_flos": 600556378272276480, "step": 156400 }, { "loss": 4.555, "learning_rate": 8.811701294961695e-06, "epoch": 0.6295987866645747, "total_flos": 600943764347535360, "step": 156500 }, { "loss": 4.49375, "learning_rate": 8.810890169565764e-06, "epoch": 0.6300010862087694, "total_flos": 601332079890186240, "step": 156600 }, { "loss": 4.544375, "learning_rate": 8.810079044169834e-06, "epoch": 0.630403385752964, "total_flos": 601722047229173760, "step": 156700 }, { "loss": 4.50375, "learning_rate": 8.809267918773904e-06, "epoch": 0.6308056852971585, "total_flos": 602108594128158720, "step": 156800 }, { "loss": 4.5075, "learning_rate": 8.808456793377972e-06, "epoch": 0.6312079848413532, "total_flos": 602491184151674880, "step": 156900 }, { "loss": 4.52, "learning_rate": 8.807645667982042e-06, "epoch": 0.6316102843855478, "total_flos": 602877056522895360, "step": 157000 }, { "loss": 4.486875, "learning_rate": 8.806834542586112e-06, "epoch": 0.6320125839297424, "total_flos": 603255009831936000, "step": 157100 }, { "loss": 4.485, "learning_rate": 8.80602341719018e-06, "epoch": 0.632414883473937, "total_flos": 603629123112837120, "step": 157200 }, { "loss": 4.50875, "learning_rate": 8.80521229179425e-06, "epoch": 0.6328171830181316, "total_flos": 604005445870510080, "step": 157300 }, { "loss": 4.55, "learning_rate": 8.804401166398319e-06, "epoch": 0.6332194825623263, "total_flos": 604402817081180160, "step": 157400 }, { "loss": 4.468125, "learning_rate": 8.80359004100239e-06, "epoch": 0.6336217821065209, "total_flos": 604785986030100480, "step": 157500 }, { "loss": 4.476875, "learning_rate": 8.80277891560646e-06, "epoch": 0.6340240816507154, "total_flos": 605185896014561280, "step": 157600 }, { "loss": 4.4925, "learning_rate": 8.801967790210528e-06, "epoch": 0.6344263811949101, "total_flos": 605565968509255680, "step": 157700 }, { "loss": 4.49375, "learning_rate": 8.801156664814598e-06, "epoch": 0.6348286807391047, "total_flos": 605950656473456640, "step": 157800 }, { "loss": 4.495, "learning_rate": 8.800345539418668e-06, "epoch": 0.6352309802832994, "total_flos": 606332194871009280, "step": 157900 }, { "loss": 4.501875, "learning_rate": 8.799534414022736e-06, "epoch": 0.635633279827494, "total_flos": 606704215522467840, "step": 158000 }, { "loss": 4.539375, "learning_rate": 8.798723288626806e-06, "epoch": 0.6360355793716885, "total_flos": 607095420380897280, "step": 158100 }, { "loss": 4.51625, "learning_rate": 8.797912163230874e-06, "epoch": 0.6364378789158832, "total_flos": 607498065655111680, "step": 158200 }, { "loss": 4.49375, "learning_rate": 8.797101037834946e-06, "epoch": 0.6368401784600778, "total_flos": 607901566039326720, "step": 158300 }, { "loss": 4.47625, "learning_rate": 8.796289912439014e-06, "epoch": 0.6372424780042725, "total_flos": 608271568418734080, "step": 158400 }, { "loss": 4.5175, "learning_rate": 8.795478787043083e-06, "epoch": 0.637644777548467, "total_flos": 608647147602493440, "step": 158500 }, { "loss": 4.54375, "learning_rate": 8.794667661647153e-06, "epoch": 0.6380470770926616, "total_flos": 609035765885952000, "step": 158600 }, { "loss": 4.453125, "learning_rate": 8.793856536251223e-06, "epoch": 0.6384493766368563, "total_flos": 609423635284254720, "step": 158700 }, { "loss": 4.556875, "learning_rate": 8.793045410855293e-06, "epoch": 0.6388516761810509, "total_flos": 609811340034048000, "step": 158800 }, { "loss": 4.493125, "learning_rate": 8.792234285459361e-06, "epoch": 0.6392539757252454, "total_flos": 610208127008071680, "step": 158900 }, { "loss": 4.443125, "learning_rate": 8.79142316006343e-06, "epoch": 0.6396562752694401, "total_flos": 610568404502937600, "step": 159000 }, { "loss": 4.51875, "learning_rate": 8.790612034667501e-06, "epoch": 0.6400585748136347, "total_flos": 610938762735575040, "step": 159100 }, { "loss": 4.488125, "learning_rate": 8.78980090927157e-06, "epoch": 0.6404608743578294, "total_flos": 611328013056860160, "step": 159200 }, { "loss": 4.51625, "learning_rate": 8.788989783875638e-06, "epoch": 0.640863173902024, "total_flos": 611715861210193920, "step": 159300 }, { "loss": 4.526875, "learning_rate": 8.788178658479708e-06, "epoch": 0.6412654734462185, "total_flos": 612105897595330560, "step": 159400 }, { "loss": 4.51125, "learning_rate": 8.787367533083778e-06, "epoch": 0.6416677729904132, "total_flos": 612480313617039360, "step": 159500 }, { "loss": 4.4825, "learning_rate": 8.786556407687848e-06, "epoch": 0.6420700725346078, "total_flos": 612860136483348480, "step": 159600 }, { "loss": 4.549375, "learning_rate": 8.785745282291917e-06, "epoch": 0.6424723720788025, "total_flos": 613239114862141440, "step": 159700 }, { "loss": 4.48875, "learning_rate": 8.784934156895985e-06, "epoch": 0.642874671622997, "total_flos": 613648675373752320, "step": 159800 }, { "loss": 4.4275, "learning_rate": 8.784123031500057e-06, "epoch": 0.6432769711671916, "total_flos": 614034744260935680, "step": 159900 }, { "loss": 4.4575, "learning_rate": 8.783311906104125e-06, "epoch": 0.6436792707113863, "total_flos": 614419166663024640, "step": 160000 }, { "loss": 4.436875, "learning_rate": 8.782500780708193e-06, "epoch": 0.6440815702555809, "total_flos": 614799669368340480, "step": 160100 }, { "loss": 4.404375, "learning_rate": 8.781689655312263e-06, "epoch": 0.6444838697997756, "total_flos": 615192010832609280, "step": 160200 }, { "loss": 4.485625, "learning_rate": 8.780878529916333e-06, "epoch": 0.6448861693439701, "total_flos": 615568365457735680, "step": 160300 }, { "loss": 4.48, "learning_rate": 8.780067404520403e-06, "epoch": 0.6452884688881647, "total_flos": 615954444967403520, "step": 160400 }, { "loss": 4.46875, "learning_rate": 8.779256279124472e-06, "epoch": 0.6456907684323594, "total_flos": 616360229185781760, "step": 160500 }, { "loss": 4.4525, "learning_rate": 8.77844515372854e-06, "epoch": 0.646093067976554, "total_flos": 616744115152404480, "step": 160600 }, { "loss": 4.491875, "learning_rate": 8.777634028332612e-06, "epoch": 0.6464953675207487, "total_flos": 617121176172748800, "step": 160700 }, { "loss": 4.46125, "learning_rate": 8.77682290293668e-06, "epoch": 0.6468976670649432, "total_flos": 617507861164032000, "step": 160800 }, { "loss": 4.504375, "learning_rate": 8.776011777540749e-06, "epoch": 0.6472999666091378, "total_flos": 617888066439782400, "step": 160900 }, { "loss": 4.4625, "learning_rate": 8.775200652144819e-06, "epoch": 0.6477022661533325, "total_flos": 618269519857459200, "step": 161000 }, { "loss": 4.489375, "learning_rate": 8.774389526748889e-06, "epoch": 0.6481045656975271, "total_flos": 618646426851778560, "step": 161100 }, { "loss": 4.505, "learning_rate": 8.773578401352959e-06, "epoch": 0.6485068652417216, "total_flos": 619033095909335040, "step": 161200 }, { "loss": 4.46, "learning_rate": 8.772767275957027e-06, "epoch": 0.6489091647859163, "total_flos": 619427221950996480, "step": 161300 }, { "loss": 4.48375, "learning_rate": 8.771956150561095e-06, "epoch": 0.6493114643301109, "total_flos": 619813731671285760, "step": 161400 }, { "loss": 4.528125, "learning_rate": 8.771145025165167e-06, "epoch": 0.6497137638743056, "total_flos": 620202721741701120, "step": 161500 }, { "loss": 4.455625, "learning_rate": 8.770333899769235e-06, "epoch": 0.6501160634185001, "total_flos": 620576048958750720, "step": 161600 }, { "loss": 4.438125, "learning_rate": 8.769522774373306e-06, "epoch": 0.6505183629626947, "total_flos": 620968804699914240, "step": 161700 }, { "loss": 4.4475, "learning_rate": 8.768711648977376e-06, "epoch": 0.6509206625068894, "total_flos": 621344638823301120, "step": 161800 }, { "loss": 4.413125, "learning_rate": 8.767900523581444e-06, "epoch": 0.651322962051084, "total_flos": 621725800122654720, "step": 161900 }, { "loss": 4.468125, "learning_rate": 8.767089398185514e-06, "epoch": 0.6517252615952787, "total_flos": 622095531628707840, "step": 162000 }, { "loss": 4.43875, "learning_rate": 8.766278272789582e-06, "epoch": 0.6521275611394732, "total_flos": 622488239568691200, "step": 162100 }, { "loss": 4.465625, "learning_rate": 8.765467147393652e-06, "epoch": 0.6525298606836678, "total_flos": 622868343930839040, "step": 162200 }, { "loss": 4.420625, "learning_rate": 8.764656021997722e-06, "epoch": 0.6529321602278625, "total_flos": 623251645660815360, "step": 162300 }, { "loss": 4.425625, "learning_rate": 8.76384489660179e-06, "epoch": 0.6533344597720571, "total_flos": 623644789122662400, "step": 162400 }, { "loss": 4.4425, "learning_rate": 8.76303377120586e-06, "epoch": 0.6537367593162517, "total_flos": 624021706739466240, "step": 162500 }, { "loss": 4.475, "learning_rate": 8.76222264580993e-06, "epoch": 0.6541390588604463, "total_flos": 624406012294225920, "step": 162600 }, { "loss": 4.41, "learning_rate": 8.761411520414e-06, "epoch": 0.6545413584046409, "total_flos": 624798162553774080, "step": 162700 }, { "loss": 4.463125, "learning_rate": 8.76060039501807e-06, "epoch": 0.6549436579488356, "total_flos": 625182882385428480, "step": 162800 }, { "loss": 4.473125, "learning_rate": 8.759789269622138e-06, "epoch": 0.6553459574930302, "total_flos": 625563894969999360, "step": 162900 }, { "loss": 4.476875, "learning_rate": 8.758978144226208e-06, "epoch": 0.6557482570372247, "total_flos": 625942151019847680, "step": 163000 }, { "loss": 4.445625, "learning_rate": 8.758167018830278e-06, "epoch": 0.6561505565814194, "total_flos": 626310634383974400, "step": 163100 }, { "loss": 4.425, "learning_rate": 8.757355893434346e-06, "epoch": 0.656552856125614, "total_flos": 626701653348925440, "step": 163200 }, { "loss": 4.405625, "learning_rate": 8.756544768038416e-06, "epoch": 0.6569551556698087, "total_flos": 627091307324620800, "step": 163300 }, { "loss": 4.449375, "learning_rate": 8.755733642642486e-06, "epoch": 0.6573574552140032, "total_flos": 627477747998760960, "step": 163400 }, { "loss": 4.446875, "learning_rate": 8.754922517246554e-06, "epoch": 0.6577597547581978, "total_flos": 627883484415959040, "step": 163500 }, { "loss": 4.4625, "learning_rate": 8.754111391850624e-06, "epoch": 0.6581620543023925, "total_flos": 628264109279846400, "step": 163600 }, { "loss": 4.395625, "learning_rate": 8.753300266454693e-06, "epoch": 0.6585643538465871, "total_flos": 628650666801315840, "step": 163700 }, { "loss": 4.440625, "learning_rate": 8.752489141058763e-06, "epoch": 0.6589666533907818, "total_flos": 629046147209748480, "step": 163800 }, { "loss": 4.429375, "learning_rate": 8.751678015662833e-06, "epoch": 0.6593689529349763, "total_flos": 629425630156554240, "step": 163900 }, { "loss": 4.350625, "learning_rate": 8.750866890266901e-06, "epoch": 0.6597712524791709, "total_flos": 629820951227719680, "step": 164000 }, { "loss": 4.429375, "learning_rate": 8.750055764870971e-06, "epoch": 0.6601735520233656, "total_flos": 630211545293291520, "step": 164100 }, { "loss": 4.43875, "learning_rate": 8.749244639475041e-06, "epoch": 0.6605758515675602, "total_flos": 630592446341775360, "step": 164200 }, { "loss": 4.43875, "learning_rate": 8.74843351407911e-06, "epoch": 0.6609781511117548, "total_flos": 630984214191882240, "step": 164300 }, { "loss": 4.39, "learning_rate": 8.74762238868318e-06, "epoch": 0.6613804506559494, "total_flos": 631365072750428160, "step": 164400 }, { "loss": 4.450625, "learning_rate": 8.746811263287248e-06, "epoch": 0.661782750200144, "total_flos": 631735059196108800, "step": 164500 }, { "loss": 4.420625, "learning_rate": 8.746000137891318e-06, "epoch": 0.6621850497443387, "total_flos": 632128197346713600, "step": 164600 }, { "loss": 4.433125, "learning_rate": 8.745189012495388e-06, "epoch": 0.6625873492885332, "total_flos": 632507584691159040, "step": 164700 }, { "loss": 4.399375, "learning_rate": 8.744377887099457e-06, "epoch": 0.6629896488327278, "total_flos": 632885044054671360, "step": 164800 }, { "loss": 4.4425, "learning_rate": 8.743566761703527e-06, "epoch": 0.6633919483769225, "total_flos": 633263825917501440, "step": 164900 }, { "loss": 4.399375, "learning_rate": 8.742755636307597e-06, "epoch": 0.6637942479211171, "total_flos": 633662434647613440, "step": 165000 }, { "loss": 4.40375, "learning_rate": 8.741944510911665e-06, "epoch": 0.6641965474653118, "total_flos": 634031852790374400, "step": 165100 }, { "loss": 4.4375, "learning_rate": 8.741133385515735e-06, "epoch": 0.6645988470095063, "total_flos": 634417257772277760, "step": 165200 }, { "loss": 4.475625, "learning_rate": 8.740322260119803e-06, "epoch": 0.6650011465537009, "total_flos": 634798907705917440, "step": 165300 }, { "loss": 4.38375, "learning_rate": 8.739511134723873e-06, "epoch": 0.6654034460978956, "total_flos": 635178465010114560, "step": 165400 }, { "loss": 4.42, "learning_rate": 8.738700009327943e-06, "epoch": 0.6658057456420902, "total_flos": 635559987473940480, "step": 165500 }, { "loss": 4.42875, "learning_rate": 8.737888883932012e-06, "epoch": 0.6662080451862848, "total_flos": 635921475932037120, "step": 165600 }, { "loss": 4.42, "learning_rate": 8.737077758536082e-06, "epoch": 0.6666103447304794, "total_flos": 636300804852817920, "step": 165700 }, { "loss": 4.42375, "learning_rate": 8.736266633140152e-06, "epoch": 0.667012644274674, "total_flos": 636682024575836160, "step": 165800 }, { "loss": 4.394375, "learning_rate": 8.73545550774422e-06, "epoch": 0.6674149438188687, "total_flos": 637078885907251200, "step": 165900 }, { "loss": 4.4075, "learning_rate": 8.73464438234829e-06, "epoch": 0.6678172433630633, "total_flos": 637456626766602240, "step": 166000 }, { "loss": 4.425, "learning_rate": 8.733833256952359e-06, "epoch": 0.6682195429072579, "total_flos": 637842934659686400, "step": 166100 }, { "loss": 4.369375, "learning_rate": 8.733022131556429e-06, "epoch": 0.6686218424514525, "total_flos": 638223235537797120, "step": 166200 }, { "loss": 4.394375, "learning_rate": 8.732211006160499e-06, "epoch": 0.6690241419956471, "total_flos": 638598251729879040, "step": 166300 }, { "loss": 4.433125, "learning_rate": 8.731399880764567e-06, "epoch": 0.6694264415398418, "total_flos": 638987209932840960, "step": 166400 }, { "loss": 4.4625, "learning_rate": 8.730588755368637e-06, "epoch": 0.6698287410840363, "total_flos": 639365062328279040, "step": 166500 }, { "loss": 4.40625, "learning_rate": 8.729777629972707e-06, "epoch": 0.670231040628231, "total_flos": 639748974851112960, "step": 166600 }, { "loss": 4.4025, "learning_rate": 8.728966504576775e-06, "epoch": 0.6706333401724256, "total_flos": 640131586119598080, "step": 166700 }, { "loss": 4.37625, "learning_rate": 8.728155379180846e-06, "epoch": 0.6710356397166202, "total_flos": 640522647574487040, "step": 166800 }, { "loss": 4.358125, "learning_rate": 8.727344253784916e-06, "epoch": 0.6714379392608149, "total_flos": 640898986265886720, "step": 166900 }, { "loss": 4.39875, "learning_rate": 8.726533128388984e-06, "epoch": 0.6718402388050094, "total_flos": 641285331337666560, "step": 167000 }, { "loss": 4.43875, "learning_rate": 8.725722002993054e-06, "epoch": 0.672242538349204, "total_flos": 641673567211683840, "step": 167100 }, { "loss": 4.415, "learning_rate": 8.724910877597122e-06, "epoch": 0.6726448378933987, "total_flos": 642071235851919360, "step": 167200 }, { "loss": 4.43625, "learning_rate": 8.724099752201192e-06, "epoch": 0.6730471374375933, "total_flos": 642434354861383680, "step": 167300 }, { "loss": 4.408125, "learning_rate": 8.723288626805262e-06, "epoch": 0.6734494369817879, "total_flos": 642824035393290240, "step": 167400 }, { "loss": 4.3725, "learning_rate": 8.72247750140933e-06, "epoch": 0.6738517365259825, "total_flos": 643198966605496320, "step": 167500 }, { "loss": 4.36875, "learning_rate": 8.7216663760134e-06, "epoch": 0.6742540360701771, "total_flos": 643573982797578240, "step": 167600 }, { "loss": 4.36875, "learning_rate": 8.72085525061747e-06, "epoch": 0.6746563356143718, "total_flos": 643961406051532800, "step": 167700 }, { "loss": 4.39375, "learning_rate": 8.72004412522154e-06, "epoch": 0.6750586351585663, "total_flos": 644343220633681920, "step": 167800 }, { "loss": 4.373125, "learning_rate": 8.71923299982561e-06, "epoch": 0.675460934702761, "total_flos": 644740490930749440, "step": 167900 }, { "loss": 4.36, "learning_rate": 8.718421874429678e-06, "epoch": 0.6758632342469556, "total_flos": 645129300418928640, "step": 168000 }, { "loss": 4.4175, "learning_rate": 8.717610749033748e-06, "epoch": 0.6762655337911502, "total_flos": 645507705183559680, "step": 168100 }, { "loss": 4.40125, "learning_rate": 8.716799623637818e-06, "epoch": 0.6766678333353449, "total_flos": 645879667411353600, "step": 168200 }, { "loss": 4.398125, "learning_rate": 8.715988498241886e-06, "epoch": 0.6770701328795394, "total_flos": 646254274637783040, "step": 168300 }, { "loss": 4.419375, "learning_rate": 8.715177372845956e-06, "epoch": 0.6774724324237341, "total_flos": 646629609504399360, "step": 168400 }, { "loss": 4.410625, "learning_rate": 8.714366247450026e-06, "epoch": 0.6778747319679287, "total_flos": 647017335499161600, "step": 168500 }, { "loss": 4.37, "learning_rate": 8.713555122054094e-06, "epoch": 0.6782770315121233, "total_flos": 647391900235653120, "step": 168600 }, { "loss": 4.369375, "learning_rate": 8.712743996658164e-06, "epoch": 0.678679331056318, "total_flos": 647759995879096320, "step": 168700 }, { "loss": 4.396875, "learning_rate": 8.711932871262233e-06, "epoch": 0.6790816306005125, "total_flos": 648151354763550720, "step": 168800 }, { "loss": 4.331875, "learning_rate": 8.711121745866303e-06, "epoch": 0.6794839301447071, "total_flos": 648527836858490880, "step": 168900 }, { "loss": 4.360625, "learning_rate": 8.710310620470373e-06, "epoch": 0.6798862296889018, "total_flos": 648908748529459200, "step": 169000 }, { "loss": 4.388125, "learning_rate": 8.709499495074441e-06, "epoch": 0.6802885292330964, "total_flos": 649298768980869120, "step": 169100 }, { "loss": 4.33125, "learning_rate": 8.708688369678511e-06, "epoch": 0.680690828777291, "total_flos": 649673657703137280, "step": 169200 }, { "loss": 4.3925, "learning_rate": 8.707877244282581e-06, "epoch": 0.6810931283214856, "total_flos": 650053199073607680, "step": 169300 }, { "loss": 4.375625, "learning_rate": 8.70706611888665e-06, "epoch": 0.6814954278656802, "total_flos": 650442481262346240, "step": 169400 }, { "loss": 4.39625, "learning_rate": 8.70625499349072e-06, "epoch": 0.6818977274098749, "total_flos": 650827753463193600, "step": 169500 }, { "loss": 4.436875, "learning_rate": 8.705443868094788e-06, "epoch": 0.6823000269540694, "total_flos": 651214735884042240, "step": 169600 }, { "loss": 4.305625, "learning_rate": 8.704632742698858e-06, "epoch": 0.6827023264982641, "total_flos": 651602541547438080, "step": 169700 }, { "loss": 4.379375, "learning_rate": 8.703821617302928e-06, "epoch": 0.6831046260424587, "total_flos": 651997788261212160, "step": 169800 }, { "loss": 4.384375, "learning_rate": 8.703010491906997e-06, "epoch": 0.6835069255866533, "total_flos": 652376517011619840, "step": 169900 }, { "loss": 4.399375, "learning_rate": 8.702199366511067e-06, "epoch": 0.683909225130848, "total_flos": 652759994012590080, "step": 170000 }, { "loss": 4.38125, "learning_rate": 8.701388241115137e-06, "epoch": 0.6843115246750425, "total_flos": 653162740200407040, "step": 170100 }, { "loss": 4.39375, "learning_rate": 8.700577115719205e-06, "epoch": 0.6847138242192372, "total_flos": 653533911053107200, "step": 170200 }, { "loss": 4.35375, "learning_rate": 8.699765990323275e-06, "epoch": 0.6851161237634318, "total_flos": 653936805955706880, "step": 170300 }, { "loss": 4.36125, "learning_rate": 8.698954864927343e-06, "epoch": 0.6855184233076264, "total_flos": 654324696598978560, "step": 170400 }, { "loss": 4.32625, "learning_rate": 8.698143739531413e-06, "epoch": 0.685920722851821, "total_flos": 654695309771243520, "step": 170500 }, { "loss": 4.39875, "learning_rate": 8.697332614135483e-06, "epoch": 0.6863230223960156, "total_flos": 655082876428738560, "step": 170600 }, { "loss": 4.36625, "learning_rate": 8.696521488739552e-06, "epoch": 0.6867253219402102, "total_flos": 655468318589337600, "step": 170700 }, { "loss": 4.3025, "learning_rate": 8.695710363343622e-06, "epoch": 0.6871276214844049, "total_flos": 655867458443673600, "step": 170800 }, { "loss": 4.380625, "learning_rate": 8.694899237947692e-06, "epoch": 0.6875299210285994, "total_flos": 656247695586877440, "step": 170900 }, { "loss": 4.383125, "learning_rate": 8.69408811255176e-06, "epoch": 0.6879322205727941, "total_flos": 656627040441384960, "step": 171000 }, { "loss": 4.3175, "learning_rate": 8.69327698715583e-06, "epoch": 0.6883345201169887, "total_flos": 657029547623301120, "step": 171100 }, { "loss": 4.3775, "learning_rate": 8.6924658617599e-06, "epoch": 0.6887368196611833, "total_flos": 657407888653025280, "step": 171200 }, { "loss": 4.320625, "learning_rate": 8.691654736363969e-06, "epoch": 0.689139119205378, "total_flos": 657798174666547200, "step": 171300 }, { "loss": 4.363125, "learning_rate": 8.690843610968039e-06, "epoch": 0.6895414187495725, "total_flos": 658178018777825280, "step": 171400 }, { "loss": 4.311875, "learning_rate": 8.690032485572107e-06, "epoch": 0.6899437182937672, "total_flos": 658556933421711360, "step": 171500 }, { "loss": 4.335, "learning_rate": 8.689221360176177e-06, "epoch": 0.6903460178379618, "total_flos": 658953396409958400, "step": 171600 }, { "loss": 4.311875, "learning_rate": 8.688410234780247e-06, "epoch": 0.6907483173821564, "total_flos": 659339608700682240, "step": 171700 }, { "loss": 4.351875, "learning_rate": 8.687599109384315e-06, "epoch": 0.691150616926351, "total_flos": 659735115665326080, "step": 171800 }, { "loss": 4.36125, "learning_rate": 8.686787983988386e-06, "epoch": 0.6915529164705456, "total_flos": 660118560798842880, "step": 171900 }, { "loss": 4.388125, "learning_rate": 8.685976858592456e-06, "epoch": 0.6919552160147403, "total_flos": 660502016554844160, "step": 172000 }, { "loss": 4.3475, "learning_rate": 8.685165733196524e-06, "epoch": 0.6923575155589349, "total_flos": 660888175733145600, "step": 172100 }, { "loss": 4.384375, "learning_rate": 8.684354607800594e-06, "epoch": 0.6927598151031295, "total_flos": 661252941227704320, "step": 172200 }, { "loss": 4.338125, "learning_rate": 8.683543482404662e-06, "epoch": 0.6931621146473241, "total_flos": 661643253797437440, "step": 172300 }, { "loss": 4.363125, "learning_rate": 8.682732357008732e-06, "epoch": 0.6935644141915187, "total_flos": 662009448016158720, "step": 172400 }, { "loss": 4.335625, "learning_rate": 8.681921231612802e-06, "epoch": 0.6939667137357134, "total_flos": 662402262180986880, "step": 172500 }, { "loss": 4.356875, "learning_rate": 8.68111010621687e-06, "epoch": 0.694369013279908, "total_flos": 662797158352773120, "step": 172600 }, { "loss": 4.281875, "learning_rate": 8.68029898082094e-06, "epoch": 0.6947713128241025, "total_flos": 663178686127841280, "step": 172700 }, { "loss": 4.335, "learning_rate": 8.67948785542501e-06, "epoch": 0.6951736123682972, "total_flos": 663578782005780480, "step": 172800 }, { "loss": 4.326875, "learning_rate": 8.67867673002908e-06, "epoch": 0.6955759119124918, "total_flos": 663952109222830080, "step": 172900 }, { "loss": 4.374375, "learning_rate": 8.67786560463315e-06, "epoch": 0.6959782114566864, "total_flos": 664340685016350720, "step": 173000 }, { "loss": 4.33875, "learning_rate": 8.677054479237218e-06, "epoch": 0.696380511000881, "total_flos": 664747090650071040, "step": 173100 }, { "loss": 4.324375, "learning_rate": 8.676243353841288e-06, "epoch": 0.6967828105450756, "total_flos": 665129840010854400, "step": 173200 }, { "loss": 4.338125, "learning_rate": 8.675432228445358e-06, "epoch": 0.6971851100892703, "total_flos": 665527513962332160, "step": 173300 }, { "loss": 4.348125, "learning_rate": 8.674621103049426e-06, "epoch": 0.6975874096334649, "total_flos": 665924035374243840, "step": 173400 }, { "loss": 4.32125, "learning_rate": 8.673809977653496e-06, "epoch": 0.6979897091776595, "total_flos": 666297829980610560, "step": 173500 }, { "loss": 4.285625, "learning_rate": 8.672998852257566e-06, "epoch": 0.6983920087218541, "total_flos": 666685332903198720, "step": 173600 }, { "loss": 4.313125, "learning_rate": 8.672187726861634e-06, "epoch": 0.6987943082660487, "total_flos": 667061124536647680, "step": 173700 }, { "loss": 4.335, "learning_rate": 8.671376601465704e-06, "epoch": 0.6991966078102434, "total_flos": 667457545034956800, "step": 173800 }, { "loss": 4.32375, "learning_rate": 8.670565476069773e-06, "epoch": 0.699598907354438, "total_flos": 667830452663869440, "step": 173900 }, { "loss": 4.296875, "learning_rate": 8.669754350673843e-06, "epoch": 0.7000012068986325, "total_flos": 668189991896064000, "step": 174000 }, { "loss": 4.280625, "learning_rate": 8.668943225277913e-06, "epoch": 0.7004035064428272, "total_flos": 668586375215677440, "step": 174100 }, { "loss": 4.28125, "learning_rate": 8.668132099881981e-06, "epoch": 0.7008058059870218, "total_flos": 668965969698570240, "step": 174200 }, { "loss": 4.3, "learning_rate": 8.667320974486051e-06, "epoch": 0.7012081055312165, "total_flos": 669350535504199680, "step": 174300 }, { "loss": 4.2675, "learning_rate": 8.666509849090121e-06, "epoch": 0.7016104050754111, "total_flos": 669721143365222400, "step": 174400 }, { "loss": 4.34, "learning_rate": 8.66569872369419e-06, "epoch": 0.7020127046196056, "total_flos": 670112635030732800, "step": 174500 }, { "loss": 4.34625, "learning_rate": 8.66488759829826e-06, "epoch": 0.7024150041638003, "total_flos": 670484060823060480, "step": 174600 }, { "loss": 4.270625, "learning_rate": 8.664076472902328e-06, "epoch": 0.7028173037079949, "total_flos": 670865636399308800, "step": 174700 }, { "loss": 4.328125, "learning_rate": 8.663265347506398e-06, "epoch": 0.7032196032521895, "total_flos": 671253038408294400, "step": 174800 }, { "loss": 4.276875, "learning_rate": 8.662454222110468e-06, "epoch": 0.7036219027963841, "total_flos": 671645826016911360, "step": 174900 }, { "loss": 4.385, "learning_rate": 8.661643096714537e-06, "epoch": 0.7040242023405787, "total_flos": 672025404566077440, "step": 175000 }, { "loss": 4.30875, "learning_rate": 8.660831971318607e-06, "epoch": 0.7044265018847734, "total_flos": 672403936800522240, "step": 175100 }, { "loss": 4.294375, "learning_rate": 8.660020845922677e-06, "epoch": 0.704828801428968, "total_flos": 672782904556830720, "step": 175200 }, { "loss": 4.320625, "learning_rate": 8.659209720526745e-06, "epoch": 0.7052311009731626, "total_flos": 673171352880537600, "step": 175300 }, { "loss": 4.33375, "learning_rate": 8.658398595130815e-06, "epoch": 0.7056334005173572, "total_flos": 673549460215603200, "step": 175400 }, { "loss": 4.35125, "learning_rate": 8.657587469734883e-06, "epoch": 0.7060357000615518, "total_flos": 673931572227317760, "step": 175500 }, { "loss": 4.361875, "learning_rate": 8.656776344338953e-06, "epoch": 0.7064379996057465, "total_flos": 674329931329044480, "step": 175600 }, { "loss": 4.303125, "learning_rate": 8.655965218943023e-06, "epoch": 0.7068402991499411, "total_flos": 674714029745356800, "step": 175700 }, { "loss": 4.284375, "learning_rate": 8.655154093547092e-06, "epoch": 0.7072425986941356, "total_flos": 675106360587141120, "step": 175800 }, { "loss": 4.290625, "learning_rate": 8.654342968151162e-06, "epoch": 0.7076448982383303, "total_flos": 675519357472481280, "step": 175900 }, { "loss": 4.256875, "learning_rate": 8.653531842755232e-06, "epoch": 0.7080471977825249, "total_flos": 675913775632465920, "step": 176000 }, { "loss": 4.265625, "learning_rate": 8.6527207173593e-06, "epoch": 0.7084494973267196, "total_flos": 676282227129139200, "step": 176100 }, { "loss": 4.338125, "learning_rate": 8.65190959196337e-06, "epoch": 0.7088517968709142, "total_flos": 676651496557117440, "step": 176200 }, { "loss": 4.275, "learning_rate": 8.65109846656744e-06, "epoch": 0.7092540964151087, "total_flos": 677019512531927040, "step": 176300 }, { "loss": 4.328125, "learning_rate": 8.650287341171509e-06, "epoch": 0.7096563959593034, "total_flos": 677403924311531520, "step": 176400 }, { "loss": 4.2975, "learning_rate": 8.649476215775579e-06, "epoch": 0.710058695503498, "total_flos": 677790688971448320, "step": 176500 }, { "loss": 4.31625, "learning_rate": 8.648665090379647e-06, "epoch": 0.7104609950476926, "total_flos": 678183391600189440, "step": 176600 }, { "loss": 4.315625, "learning_rate": 8.647853964983717e-06, "epoch": 0.7108632945918872, "total_flos": 678567266944327680, "step": 176700 }, { "loss": 4.29875, "learning_rate": 8.647042839587787e-06, "epoch": 0.7112655941360818, "total_flos": 678953383632691200, "step": 176800 }, { "loss": 4.38625, "learning_rate": 8.646231714191855e-06, "epoch": 0.7116678936802765, "total_flos": 679345215217704960, "step": 176900 }, { "loss": 4.318125, "learning_rate": 8.645420588795926e-06, "epoch": 0.7120701932244711, "total_flos": 679748173855211520, "step": 177000 }, { "loss": 4.261875, "learning_rate": 8.644609463399996e-06, "epoch": 0.7124724927686656, "total_flos": 680105429253242880, "step": 177100 }, { "loss": 4.330625, "learning_rate": 8.643798338004064e-06, "epoch": 0.7128747923128603, "total_flos": 680516907123302400, "step": 177200 }, { "loss": 4.30125, "learning_rate": 8.642987212608134e-06, "epoch": 0.7132770918570549, "total_flos": 680900840891105280, "step": 177300 }, { "loss": 4.275625, "learning_rate": 8.642176087212202e-06, "epoch": 0.7136793914012496, "total_flos": 681292369735311360, "step": 177400 }, { "loss": 4.380625, "learning_rate": 8.641364961816272e-06, "epoch": 0.7140816909454442, "total_flos": 681681593500385280, "step": 177500 }, { "loss": 4.245625, "learning_rate": 8.640553836420342e-06, "epoch": 0.7144839904896387, "total_flos": 682079713596211200, "step": 177600 }, { "loss": 4.300625, "learning_rate": 8.63974271102441e-06, "epoch": 0.7148862900338334, "total_flos": 682449747843072000, "step": 177700 }, { "loss": 4.299375, "learning_rate": 8.63893158562848e-06, "epoch": 0.715288589578028, "total_flos": 682849413510389760, "step": 177800 }, { "loss": 4.268125, "learning_rate": 8.63812046023255e-06, "epoch": 0.7156908891222227, "total_flos": 683227149058498560, "step": 177900 }, { "loss": 4.338125, "learning_rate": 8.63730933483662e-06, "epoch": 0.7160931886664172, "total_flos": 683607338400522240, "step": 178000 }, { "loss": 4.28875, "learning_rate": 8.63649820944069e-06, "epoch": 0.7164954882106118, "total_flos": 684001740626780160, "step": 178100 }, { "loss": 4.2625, "learning_rate": 8.635687084044758e-06, "epoch": 0.7168977877548065, "total_flos": 684398081456455680, "step": 178200 }, { "loss": 4.28125, "learning_rate": 8.634875958648828e-06, "epoch": 0.7173000872990011, "total_flos": 684790656615383040, "step": 178300 }, { "loss": 4.251875, "learning_rate": 8.634064833252898e-06, "epoch": 0.7177023868431958, "total_flos": 685170458236723200, "step": 178400 }, { "loss": 4.29625, "learning_rate": 8.633253707856966e-06, "epoch": 0.7181046863873903, "total_flos": 685568148121927680, "step": 178500 }, { "loss": 4.27625, "learning_rate": 8.632442582461036e-06, "epoch": 0.7185069859315849, "total_flos": 685965418418995200, "step": 178600 }, { "loss": 4.25625, "learning_rate": 8.631631457065106e-06, "epoch": 0.7189092854757796, "total_flos": 686335951922626560, "step": 178700 }, { "loss": 4.31875, "learning_rate": 8.630820331669174e-06, "epoch": 0.7193115850199742, "total_flos": 686717522187632640, "step": 178800 }, { "loss": 4.28, "learning_rate": 8.630009206273244e-06, "epoch": 0.7197138845641687, "total_flos": 687086218001448960, "step": 178900 }, { "loss": 4.28, "learning_rate": 8.629198080877313e-06, "epoch": 0.7201161841083634, "total_flos": 687455758302781440, "step": 179000 }, { "loss": 4.27, "learning_rate": 8.628386955481385e-06, "epoch": 0.720518483652558, "total_flos": 687843611767357440, "step": 179100 }, { "loss": 4.3375, "learning_rate": 8.627575830085453e-06, "epoch": 0.7209207831967527, "total_flos": 688231598012989440, "step": 179200 }, { "loss": 4.2975, "learning_rate": 8.626764704689521e-06, "epoch": 0.7213230827409473, "total_flos": 688621262611169280, "step": 179300 }, { "loss": 4.2875, "learning_rate": 8.625953579293591e-06, "epoch": 0.7217253822851418, "total_flos": 689008494660403200, "step": 179400 }, { "loss": 4.27125, "learning_rate": 8.625142453897661e-06, "epoch": 0.7221276818293365, "total_flos": 689409280999833600, "step": 179500 }, { "loss": 4.26375, "learning_rate": 8.62433132850173e-06, "epoch": 0.7225299813735311, "total_flos": 689806620343050240, "step": 179600 }, { "loss": 4.31625, "learning_rate": 8.6235202031058e-06, "epoch": 0.7229322809177258, "total_flos": 690193762101166080, "step": 179700 }, { "loss": 4.24875, "learning_rate": 8.622709077709868e-06, "epoch": 0.7233345804619203, "total_flos": 690580930415493120, "step": 179800 }, { "loss": 4.2725, "learning_rate": 8.62189795231394e-06, "epoch": 0.7237368800061149, "total_flos": 690956233414656000, "step": 179900 }, { "loss": 4.29875, "learning_rate": 8.621086826918008e-06, "epoch": 0.7241391795503096, "total_flos": 691344511778611200, "step": 180000 }, { "loss": 4.22125, "learning_rate": 8.620275701522077e-06, "epoch": 0.7245414790945042, "total_flos": 691715379890503680, "step": 180100 }, { "loss": 4.25625, "learning_rate": 8.619464576126147e-06, "epoch": 0.7249437786386989, "total_flos": 692097077625323520, "step": 180200 }, { "loss": 4.25625, "learning_rate": 8.618653450730217e-06, "epoch": 0.7253460781828934, "total_flos": 692484145026048000, "step": 180300 }, { "loss": 4.255, "learning_rate": 8.617842325334285e-06, "epoch": 0.725748377727088, "total_flos": 692860080063037440, "step": 180400 }, { "loss": 4.33, "learning_rate": 8.617031199938355e-06, "epoch": 0.7261506772712827, "total_flos": 693244635246182400, "step": 180500 }, { "loss": 4.23875, "learning_rate": 8.616220074542423e-06, "epoch": 0.7265529768154773, "total_flos": 693618578567331840, "step": 180600 }, { "loss": 4.2975, "learning_rate": 8.615408949146495e-06, "epoch": 0.7269552763596718, "total_flos": 693994311777116160, "step": 180700 }, { "loss": 4.2225, "learning_rate": 8.614597823750563e-06, "epoch": 0.7273575759038665, "total_flos": 694374527675351040, "step": 180800 }, { "loss": 4.30625, "learning_rate": 8.613786698354632e-06, "epoch": 0.7277598754480611, "total_flos": 694780641190748160, "step": 180900 }, { "loss": 4.255, "learning_rate": 8.612975572958704e-06, "epoch": 0.7281621749922558, "total_flos": 695157585363763200, "step": 181000 }, { "loss": 4.265, "learning_rate": 8.612164447562772e-06, "epoch": 0.7285644745364503, "total_flos": 695533212348702720, "step": 181100 }, { "loss": 4.24375, "learning_rate": 8.611353322166842e-06, "epoch": 0.7289667740806449, "total_flos": 695901669156618240, "step": 181200 }, { "loss": 4.32125, "learning_rate": 8.61054219677091e-06, "epoch": 0.7293690736248396, "total_flos": 696282772032307200, "step": 181300 }, { "loss": 4.2575, "learning_rate": 8.60973107137498e-06, "epoch": 0.7297713731690342, "total_flos": 696661484848988160, "step": 181400 }, { "loss": 4.22875, "learning_rate": 8.60891994597905e-06, "epoch": 0.7301736727132289, "total_flos": 697042911710453760, "step": 181500 }, { "loss": 4.24125, "learning_rate": 8.608108820583119e-06, "epoch": 0.7305759722574234, "total_flos": 697427164152791040, "step": 181600 }, { "loss": 4.29, "learning_rate": 8.607297695187187e-06, "epoch": 0.730978271801618, "total_flos": 697818485858549760, "step": 181700 }, { "loss": 4.265, "learning_rate": 8.606486569791259e-06, "epoch": 0.7313805713458127, "total_flos": 698195169780695040, "step": 181800 }, { "loss": 4.29125, "learning_rate": 8.605675444395327e-06, "epoch": 0.7317828708900073, "total_flos": 698578811430174720, "step": 181900 }, { "loss": 4.21625, "learning_rate": 8.604864318999397e-06, "epoch": 0.732185170434202, "total_flos": 698967068549160960, "step": 182000 }, { "loss": 4.2525, "learning_rate": 8.604053193603466e-06, "epoch": 0.7325874699783965, "total_flos": 699356053308334080, "step": 182100 }, { "loss": 4.2525, "learning_rate": 8.603242068207536e-06, "epoch": 0.7329897695225911, "total_flos": 699740337618124800, "step": 182200 }, { "loss": 4.2575, "learning_rate": 8.602430942811606e-06, "epoch": 0.7333920690667858, "total_flos": 700131797416181760, "step": 182300 }, { "loss": 4.265, "learning_rate": 8.601619817415674e-06, "epoch": 0.7337943686109804, "total_flos": 700503547194286080, "step": 182400 }, { "loss": 4.22625, "learning_rate": 8.600808692019742e-06, "epoch": 0.7341966681551749, "total_flos": 700869459917168640, "step": 182500 }, { "loss": 4.2225, "learning_rate": 8.599997566623814e-06, "epoch": 0.7345989676993696, "total_flos": 701236796052971520, "step": 182600 }, { "loss": 4.24125, "learning_rate": 8.599186441227882e-06, "epoch": 0.7350012672435642, "total_flos": 701624129015808000, "step": 182700 }, { "loss": 4.23, "learning_rate": 8.598375315831952e-06, "epoch": 0.7354035667877589, "total_flos": 702017899204239360, "step": 182800 }, { "loss": 4.22125, "learning_rate": 8.59756419043602e-06, "epoch": 0.7358058663319534, "total_flos": 702401110643097600, "step": 182900 }, { "loss": 4.22875, "learning_rate": 8.59675306504009e-06, "epoch": 0.736208165876148, "total_flos": 702778814323752960, "step": 183000 }, { "loss": 4.22375, "learning_rate": 8.595941939644161e-06, "epoch": 0.7366104654203427, "total_flos": 703160713885777920, "step": 183100 }, { "loss": 4.30375, "learning_rate": 8.59513081424823e-06, "epoch": 0.7370127649645373, "total_flos": 703543574782648320, "step": 183200 }, { "loss": 4.19375, "learning_rate": 8.594319688852298e-06, "epoch": 0.737415064508732, "total_flos": 703915483898019840, "step": 183300 }, { "loss": 4.2675, "learning_rate": 8.59350856345637e-06, "epoch": 0.7378173640529265, "total_flos": 704314193541734400, "step": 183400 }, { "loss": 4.24, "learning_rate": 8.592697438060438e-06, "epoch": 0.7382196635971211, "total_flos": 704703581955317760, "step": 183500 }, { "loss": 4.27875, "learning_rate": 8.591886312664508e-06, "epoch": 0.7386219631413158, "total_flos": 705089279055544320, "step": 183600 }, { "loss": 4.2275, "learning_rate": 8.591075187268576e-06, "epoch": 0.7390242626855104, "total_flos": 705472267422228480, "step": 183700 }, { "loss": 4.2175, "learning_rate": 8.590264061872646e-06, "epoch": 0.739426562229705, "total_flos": 705848282127851520, "step": 183800 }, { "loss": 4.24625, "learning_rate": 8.589452936476716e-06, "epoch": 0.7398288617738996, "total_flos": 706236533935595520, "step": 183900 }, { "loss": 4.255, "learning_rate": 8.588641811080784e-06, "epoch": 0.7402311613180942, "total_flos": 706605968012083200, "step": 184000 }, { "loss": 4.2225, "learning_rate": 8.587830685684855e-06, "epoch": 0.7406334608622889, "total_flos": 706995085552312320, "step": 184100 }, { "loss": 4.1975, "learning_rate": 8.587019560288925e-06, "epoch": 0.7410357604064834, "total_flos": 707371461422407680, "step": 184200 }, { "loss": 4.21, "learning_rate": 8.586208434892993e-06, "epoch": 0.7414380599506781, "total_flos": 707754014267228160, "step": 184300 }, { "loss": 4.22375, "learning_rate": 8.585397309497063e-06, "epoch": 0.7418403594948727, "total_flos": 708144624266526720, "step": 184400 }, { "loss": 4.22, "learning_rate": 8.584586184101131e-06, "epoch": 0.7422426590390673, "total_flos": 708536328381726720, "step": 184500 }, { "loss": 4.26375, "learning_rate": 8.583775058705201e-06, "epoch": 0.742644958583262, "total_flos": 708927049917112320, "step": 184600 }, { "loss": 4.18125, "learning_rate": 8.582963933309271e-06, "epoch": 0.7430472581274565, "total_flos": 709309910813982720, "step": 184700 }, { "loss": 4.1975, "learning_rate": 8.58215280791334e-06, "epoch": 0.7434495576716511, "total_flos": 709669901501767680, "step": 184800 }, { "loss": 4.2375, "learning_rate": 8.58134168251741e-06, "epoch": 0.7438518572158458, "total_flos": 710059815728332800, "step": 184900 }, { "loss": 4.21875, "learning_rate": 8.58053055712148e-06, "epoch": 0.7442541567600404, "total_flos": 710451004653035520, "step": 185000 }, { "loss": 4.2475, "learning_rate": 8.579719431725548e-06, "epoch": 0.744656456304235, "total_flos": 710836829223075840, "step": 185100 }, { "loss": 4.22125, "learning_rate": 8.578908306329618e-06, "epoch": 0.7450587558484296, "total_flos": 711230227624550400, "step": 185200 }, { "loss": 4.2525, "learning_rate": 8.578097180933687e-06, "epoch": 0.7454610553926242, "total_flos": 711624826366771200, "step": 185300 }, { "loss": 4.24375, "learning_rate": 8.577286055537757e-06, "epoch": 0.7458633549368189, "total_flos": 712009216901406720, "step": 185400 }, { "loss": 4.24375, "learning_rate": 8.576474930141827e-06, "epoch": 0.7462656544810135, "total_flos": 712387998764236800, "step": 185500 }, { "loss": 4.2875, "learning_rate": 8.575663804745895e-06, "epoch": 0.7466679540252081, "total_flos": 712768921057689600, "step": 185600 }, { "loss": 4.21125, "learning_rate": 8.574852679349965e-06, "epoch": 0.7470702535694027, "total_flos": 713155781319966720, "step": 185700 }, { "loss": 4.2725, "learning_rate": 8.574041553954035e-06, "epoch": 0.7474725531135973, "total_flos": 713553380914053120, "step": 185800 }, { "loss": 4.23125, "learning_rate": 8.573230428558103e-06, "epoch": 0.747874852657792, "total_flos": 713949620830126080, "step": 185900 }, { "loss": 4.2475, "learning_rate": 8.572419303162173e-06, "epoch": 0.7482771522019865, "total_flos": 714321981401088000, "step": 186000 }, { "loss": 4.27, "learning_rate": 8.571608177766244e-06, "epoch": 0.7486794517461812, "total_flos": 714683528282849280, "step": 186100 }, { "loss": 4.17375, "learning_rate": 8.570797052370312e-06, "epoch": 0.7490817512903758, "total_flos": 715069687461150720, "step": 186200 }, { "loss": 4.2225, "learning_rate": 8.569985926974382e-06, "epoch": 0.7494840508345704, "total_flos": 715458937782435840, "step": 186300 }, { "loss": 4.14625, "learning_rate": 8.56917480157845e-06, "epoch": 0.749886350378765, "total_flos": 715838187034583040, "step": 186400 }, { "loss": 4.1725, "learning_rate": 8.56836367618252e-06, "epoch": 0.7502886499229596, "total_flos": 716214281408839680, "step": 186500 }, { "loss": 4.2375, "learning_rate": 8.56755255078659e-06, "epoch": 0.7506909494671542, "total_flos": 716603797292236800, "step": 186600 }, { "loss": 4.24, "learning_rate": 8.566741425390659e-06, "epoch": 0.7510932490113489, "total_flos": 717000148744396800, "step": 186700 }, { "loss": 4.20625, "learning_rate": 8.565930299994729e-06, "epoch": 0.7514955485555435, "total_flos": 717389314085806080, "step": 186800 }, { "loss": 4.17125, "learning_rate": 8.565119174598799e-06, "epoch": 0.7518978480997381, "total_flos": 717773577150627840, "step": 186900 }, { "loss": 4.2225, "learning_rate": 8.564308049202867e-06, "epoch": 0.7523001476439327, "total_flos": 718163257682534400, "step": 187000 }, { "loss": 4.2025, "learning_rate": 8.563496923806937e-06, "epoch": 0.7527024471881273, "total_flos": 718517214799134720, "step": 187100 }, { "loss": 4.2175, "learning_rate": 8.562685798411006e-06, "epoch": 0.753104746732322, "total_flos": 718904563695697920, "step": 187200 }, { "loss": 4.195, "learning_rate": 8.561874673015076e-06, "epoch": 0.7535070462765165, "total_flos": 719291577984000000, "step": 187300 }, { "loss": 4.21875, "learning_rate": 8.561063547619146e-06, "epoch": 0.7539093458207112, "total_flos": 719679835102986240, "step": 187400 }, { "loss": 4.20875, "learning_rate": 8.560252422223214e-06, "epoch": 0.7543116453649058, "total_flos": 720063535176130560, "step": 187500 }, { "loss": 4.24375, "learning_rate": 8.559441296827284e-06, "epoch": 0.7547139449091004, "total_flos": 720450512285736960, "step": 187600 }, { "loss": 4.21375, "learning_rate": 8.558630171431354e-06, "epoch": 0.7551162444532951, "total_flos": 720834318583726080, "step": 187700 }, { "loss": 4.215, "learning_rate": 8.557819046035422e-06, "epoch": 0.7555185439974896, "total_flos": 721222660682588160, "step": 187800 }, { "loss": 4.19625, "learning_rate": 8.557007920639492e-06, "epoch": 0.7559208435416843, "total_flos": 721597570649825280, "step": 187900 }, { "loss": 4.15125, "learning_rate": 8.55619679524356e-06, "epoch": 0.7563231430858789, "total_flos": 721968645900165120, "step": 188000 }, { "loss": 4.195, "learning_rate": 8.55538566984763e-06, "epoch": 0.7567254426300735, "total_flos": 722363265887354880, "step": 188100 }, { "loss": 4.2225, "learning_rate": 8.554574544451701e-06, "epoch": 0.7571277421742681, "total_flos": 722745972758200320, "step": 188200 }, { "loss": 4.175, "learning_rate": 8.55376341905577e-06, "epoch": 0.7575300417184627, "total_flos": 723121626299351040, "step": 188300 }, { "loss": 4.20875, "learning_rate": 8.55295229365984e-06, "epoch": 0.7579323412626573, "total_flos": 723507424313180160, "step": 188400 }, { "loss": 4.25875, "learning_rate": 8.55214116826391e-06, "epoch": 0.758334640806852, "total_flos": 723892335349555200, "step": 188500 }, { "loss": 4.15375, "learning_rate": 8.551330042867978e-06, "epoch": 0.7587369403510466, "total_flos": 724282180529971200, "step": 188600 }, { "loss": 4.22, "learning_rate": 8.550518917472048e-06, "epoch": 0.7591392398952412, "total_flos": 724673932446351360, "step": 188700 }, { "loss": 4.18125, "learning_rate": 8.549707792076116e-06, "epoch": 0.7595415394394358, "total_flos": 725062242677760000, "step": 188800 }, { "loss": 4.23375, "learning_rate": 8.548896666680186e-06, "epoch": 0.7599438389836304, "total_flos": 725460538044579840, "step": 188900 }, { "loss": 4.18625, "learning_rate": 8.548085541284256e-06, "epoch": 0.7603461385278251, "total_flos": 725829223235911680, "step": 189000 }, { "loss": 4.135, "learning_rate": 8.547274415888324e-06, "epoch": 0.7607484380720196, "total_flos": 726209725941227520, "step": 189100 }, { "loss": 4.185, "learning_rate": 8.546463290492395e-06, "epoch": 0.7611507376162143, "total_flos": 726597876835368960, "step": 189200 }, { "loss": 4.1575, "learning_rate": 8.545652165096465e-06, "epoch": 0.7615530371604089, "total_flos": 726973726892482560, "step": 189300 }, { "loss": 4.19625, "learning_rate": 8.544841039700533e-06, "epoch": 0.7619553367046035, "total_flos": 727372441847439360, "step": 189400 }, { "loss": 4.2425, "learning_rate": 8.544029914304603e-06, "epoch": 0.7623576362487982, "total_flos": 727756758024683520, "step": 189500 }, { "loss": 4.21375, "learning_rate": 8.543218788908671e-06, "epoch": 0.7627599357929927, "total_flos": 728149110111436800, "step": 189600 }, { "loss": 4.19, "learning_rate": 8.542407663512741e-06, "epoch": 0.7631622353371874, "total_flos": 728527562677248000, "step": 189700 }, { "loss": 4.1625, "learning_rate": 8.541596538116811e-06, "epoch": 0.763564534881382, "total_flos": 728915894153625600, "step": 189800 }, { "loss": 4.1825, "learning_rate": 8.54078541272088e-06, "epoch": 0.7639668344255766, "total_flos": 729284797105889280, "step": 189900 }, { "loss": 4.1675, "learning_rate": 8.53997428732495e-06, "epoch": 0.7643691339697712, "total_flos": 729650901033492480, "step": 190000 }, { "loss": 4.20375, "learning_rate": 8.53916316192902e-06, "epoch": 0.7647714335139658, "total_flos": 730030336179118080, "step": 190100 }, { "loss": 4.195, "learning_rate": 8.538352036533088e-06, "epoch": 0.7651737330581605, "total_flos": 730418099352576000, "step": 190200 }, { "loss": 4.20875, "learning_rate": 8.537540911137158e-06, "epoch": 0.7655760326023551, "total_flos": 730798108112363520, "step": 190300 }, { "loss": 4.18875, "learning_rate": 8.536729785741228e-06, "epoch": 0.7659783321465496, "total_flos": 731169990671523840, "step": 190400 }, { "loss": 4.18125, "learning_rate": 8.535918660345297e-06, "epoch": 0.7663806316907443, "total_flos": 731566294322503680, "step": 190500 }, { "loss": 4.16875, "learning_rate": 8.535107534949367e-06, "epoch": 0.7667829312349389, "total_flos": 731930066614763520, "step": 190600 }, { "loss": 4.20375, "learning_rate": 8.534296409553435e-06, "epoch": 0.7671852307791335, "total_flos": 732326710185246720, "step": 190700 }, { "loss": 4.20125, "learning_rate": 8.533485284157505e-06, "epoch": 0.7675875303233282, "total_flos": 732712550689013760, "step": 190800 }, { "loss": 4.16, "learning_rate": 8.532674158761575e-06, "epoch": 0.7679898298675227, "total_flos": 733095061043896320, "step": 190900 }, { "loss": 4.1575, "learning_rate": 8.531863033365643e-06, "epoch": 0.7683921294117174, "total_flos": 733464293293178880, "step": 191000 }, { "loss": 4.185, "learning_rate": 8.531051907969713e-06, "epoch": 0.768794428955912, "total_flos": 733855588442726400, "step": 191100 }, { "loss": 4.12375, "learning_rate": 8.530240782573784e-06, "epoch": 0.7691967285001066, "total_flos": 734227901212508160, "step": 191200 }, { "loss": 4.1875, "learning_rate": 8.529429657177852e-06, "epoch": 0.7695990280443012, "total_flos": 734599204846264320, "step": 191300 }, { "loss": 4.085, "learning_rate": 8.528618531781922e-06, "epoch": 0.7700013275884958, "total_flos": 734971618529648640, "step": 191400 }, { "loss": 4.165, "learning_rate": 8.52780740638599e-06, "epoch": 0.7704036271326905, "total_flos": 735346693145395200, "step": 191500 }, { "loss": 4.10875, "learning_rate": 8.52699628099006e-06, "epoch": 0.7708059266768851, "total_flos": 735716499008839680, "step": 191600 }, { "loss": 4.18875, "learning_rate": 8.52618515559413e-06, "epoch": 0.7712082262210797, "total_flos": 736089900583280640, "step": 191700 }, { "loss": 4.17875, "learning_rate": 8.525374030198199e-06, "epoch": 0.7716105257652743, "total_flos": 736458192742686720, "step": 191800 }, { "loss": 4.21875, "learning_rate": 8.524562904802269e-06, "epoch": 0.7720128253094689, "total_flos": 736835784887255040, "step": 191900 }, { "loss": 4.15375, "learning_rate": 8.523751779406339e-06, "epoch": 0.7724151248536636, "total_flos": 737230468609351680, "step": 192000 }, { "loss": 4.1575, "learning_rate": 8.522940654010407e-06, "epoch": 0.7728174243978582, "total_flos": 737600152314224640, "step": 192100 }, { "loss": 4.18375, "learning_rate": 8.522129528614477e-06, "epoch": 0.7732197239420527, "total_flos": 737983937367244800, "step": 192200 }, { "loss": 4.155, "learning_rate": 8.521318403218546e-06, "epoch": 0.7736220234862474, "total_flos": 738368503172874240, "step": 192300 }, { "loss": 4.22, "learning_rate": 8.520507277822616e-06, "epoch": 0.774024323030442, "total_flos": 738746419303219200, "step": 192400 }, { "loss": 4.16125, "learning_rate": 8.519696152426686e-06, "epoch": 0.7744266225746366, "total_flos": 739128361355182080, "step": 192500 }, { "loss": 4.14375, "learning_rate": 8.518885027030754e-06, "epoch": 0.7748289221188313, "total_flos": 739512231388078080, "step": 192600 }, { "loss": 4.19375, "learning_rate": 8.518073901634824e-06, "epoch": 0.7752312216630258, "total_flos": 739910850740674560, "step": 192700 }, { "loss": 4.13375, "learning_rate": 8.517262776238894e-06, "epoch": 0.7756335212072205, "total_flos": 740297259547361280, "step": 192800 }, { "loss": 4.14875, "learning_rate": 8.516451650842962e-06, "epoch": 0.7760358207514151, "total_flos": 740672212004536320, "step": 192900 }, { "loss": 4.09375, "learning_rate": 8.515640525447032e-06, "epoch": 0.7764381202956097, "total_flos": 741061871291473920, "step": 193000 }, { "loss": 4.18125, "learning_rate": 8.5148294000511e-06, "epoch": 0.7768404198398043, "total_flos": 741454685456302080, "step": 193100 }, { "loss": 4.18125, "learning_rate": 8.51401827465517e-06, "epoch": 0.7772427193839989, "total_flos": 741846262101688320, "step": 193200 }, { "loss": 4.13375, "learning_rate": 8.513207149259241e-06, "epoch": 0.7776450189281936, "total_flos": 742234715736637440, "step": 193300 }, { "loss": 4.13125, "learning_rate": 8.51239602386331e-06, "epoch": 0.7780473184723882, "total_flos": 742607899550146560, "step": 193400 }, { "loss": 4.135, "learning_rate": 8.51158489846738e-06, "epoch": 0.7784496180165827, "total_flos": 742989236120494080, "step": 193500 }, { "loss": 4.13125, "learning_rate": 8.51077377307145e-06, "epoch": 0.7788519175607774, "total_flos": 743375820198174720, "step": 193600 }, { "loss": 4.17375, "learning_rate": 8.509962647675518e-06, "epoch": 0.779254217104972, "total_flos": 743755759911813120, "step": 193700 }, { "loss": 4.215, "learning_rate": 8.509151522279588e-06, "epoch": 0.7796565166491667, "total_flos": 744144978365644800, "step": 193800 }, { "loss": 4.12, "learning_rate": 8.508340396883656e-06, "epoch": 0.7800588161933613, "total_flos": 744527642746552320, "step": 193900 }, { "loss": 4.17875, "learning_rate": 8.507529271487726e-06, "epoch": 0.7804611157375558, "total_flos": 744905617300561920, "step": 194000 }, { "loss": 4.20625, "learning_rate": 8.506718146091796e-06, "epoch": 0.7808634152817505, "total_flos": 745285785397616640, "step": 194100 }, { "loss": 4.1875, "learning_rate": 8.505907020695864e-06, "epoch": 0.7812657148259451, "total_flos": 745672279184179200, "step": 194200 }, { "loss": 4.175, "learning_rate": 8.505095895299935e-06, "epoch": 0.7816680143701397, "total_flos": 746057365491548160, "step": 194300 }, { "loss": 4.125, "learning_rate": 8.504284769904005e-06, "epoch": 0.7820703139143343, "total_flos": 746441957853388800, "step": 194400 }, { "loss": 4.16, "learning_rate": 8.503473644508073e-06, "epoch": 0.7824726134585289, "total_flos": 746823644965724160, "step": 194500 }, { "loss": 4.14125, "learning_rate": 8.502662519112143e-06, "epoch": 0.7828749130027236, "total_flos": 747227825188945920, "step": 194600 }, { "loss": 4.08, "learning_rate": 8.501851393716211e-06, "epoch": 0.7832772125469182, "total_flos": 747613994989731840, "step": 194700 }, { "loss": 4.15875, "learning_rate": 8.501040268320281e-06, "epoch": 0.7836795120911128, "total_flos": 747982913875722240, "step": 194800 }, { "loss": 4.15125, "learning_rate": 8.500229142924351e-06, "epoch": 0.7840818116353074, "total_flos": 748360861873520640, "step": 194900 }, { "loss": 4.12875, "learning_rate": 8.49941801752842e-06, "epoch": 0.784484111179502, "total_flos": 748750266220830720, "step": 195000 }, { "loss": 4.14625, "learning_rate": 8.49860689213249e-06, "epoch": 0.7848864107236967, "total_flos": 749142119050813440, "step": 195100 }, { "loss": 4.15125, "learning_rate": 8.49779576673656e-06, "epoch": 0.7852887102678913, "total_flos": 749511377856307200, "step": 195200 }, { "loss": 4.11625, "learning_rate": 8.496984641340628e-06, "epoch": 0.7856910098120858, "total_flos": 749894796433612800, "step": 195300 }, { "loss": 4.13125, "learning_rate": 8.496173515944698e-06, "epoch": 0.7860933093562805, "total_flos": 750270577444577280, "step": 195400 }, { "loss": 4.095, "learning_rate": 8.495362390548768e-06, "epoch": 0.7864956089004751, "total_flos": 750634275379445760, "step": 195500 }, { "loss": 4.14125, "learning_rate": 8.494551265152837e-06, "epoch": 0.7868979084446698, "total_flos": 751012648276623360, "step": 195600 }, { "loss": 4.095, "learning_rate": 8.493740139756907e-06, "epoch": 0.7873002079888644, "total_flos": 751400570787348480, "step": 195700 }, { "loss": 4.1525, "learning_rate": 8.492929014360975e-06, "epoch": 0.7877025075330589, "total_flos": 751778890572103680, "step": 195800 }, { "loss": 4.1725, "learning_rate": 8.492117888965045e-06, "epoch": 0.7881048070772536, "total_flos": 752160482082078720, "step": 195900 }, { "loss": 4.16375, "learning_rate": 8.491306763569115e-06, "epoch": 0.7885071066214482, "total_flos": 752545228469944320, "step": 196000 }, { "loss": 4.1475, "learning_rate": 8.490495638173183e-06, "epoch": 0.7889094061656429, "total_flos": 752941165645209600, "step": 196100 }, { "loss": 4.115, "learning_rate": 8.489684512777253e-06, "epoch": 0.7893117057098374, "total_flos": 753325975767982080, "step": 196200 }, { "loss": 4.115, "learning_rate": 8.488873387381324e-06, "epoch": 0.789714005254032, "total_flos": 753699228627640320, "step": 196300 }, { "loss": 4.1675, "learning_rate": 8.488062261985392e-06, "epoch": 0.7901163047982267, "total_flos": 754082402887802880, "step": 196400 }, { "loss": 4.13125, "learning_rate": 8.487251136589462e-06, "epoch": 0.7905186043424213, "total_flos": 754475679130705920, "step": 196500 }, { "loss": 4.07, "learning_rate": 8.48644001119353e-06, "epoch": 0.7909209038866158, "total_flos": 754859469494968320, "step": 196600 }, { "loss": 4.135, "learning_rate": 8.4856288857976e-06, "epoch": 0.7913232034308105, "total_flos": 755244194637864960, "step": 196700 }, { "loss": 4.145, "learning_rate": 8.48481776040167e-06, "epoch": 0.7917255029750051, "total_flos": 755627469811630080, "step": 196800 }, { "loss": 4.06375, "learning_rate": 8.484006635005739e-06, "epoch": 0.7921278025191998, "total_flos": 756010575025643520, "step": 196900 }, { "loss": 4.1075, "learning_rate": 8.483195509609809e-06, "epoch": 0.7925301020633944, "total_flos": 756396420840652800, "step": 197000 }, { "loss": 4.10125, "learning_rate": 8.482384384213879e-06, "epoch": 0.7929324016075889, "total_flos": 756777587451248640, "step": 197100 }, { "loss": 4.08625, "learning_rate": 8.481573258817947e-06, "epoch": 0.7933347011517836, "total_flos": 757142050204999680, "step": 197200 }, { "loss": 4.11375, "learning_rate": 8.480762133422017e-06, "epoch": 0.7937370006959782, "total_flos": 757528554614046720, "step": 197300 }, { "loss": 4.07375, "learning_rate": 8.479951008026086e-06, "epoch": 0.7941393002401729, "total_flos": 757923668546764800, "step": 197400 }, { "loss": 4.13875, "learning_rate": 8.479139882630156e-06, "epoch": 0.7945415997843674, "total_flos": 758306035498106880, "step": 197500 }, { "loss": 4.17125, "learning_rate": 8.478328757234226e-06, "epoch": 0.794943899328562, "total_flos": 758686601938329600, "step": 197600 }, { "loss": 4.12, "learning_rate": 8.477517631838294e-06, "epoch": 0.7953461988727567, "total_flos": 759076394006323200, "step": 197700 }, { "loss": 4.0725, "learning_rate": 8.476706506442364e-06, "epoch": 0.7957484984169513, "total_flos": 759455069644308480, "step": 197800 }, { "loss": 4.11875, "learning_rate": 8.475895381046434e-06, "epoch": 0.796150797961146, "total_flos": 759819665179115520, "step": 197900 }, { "loss": 4.12875, "learning_rate": 8.475084255650502e-06, "epoch": 0.7965530975053405, "total_flos": 760188281324298240, "step": 198000 }, { "loss": 4.125, "learning_rate": 8.474273130254572e-06, "epoch": 0.7969553970495351, "total_flos": 760567482775265280, "step": 198100 }, { "loss": 4.1325, "learning_rate": 8.47346200485864e-06, "epoch": 0.7973576965937298, "total_flos": 760955001631580160, "step": 198200 }, { "loss": 4.095, "learning_rate": 8.47265087946271e-06, "epoch": 0.7977599961379244, "total_flos": 761352914588958720, "step": 198300 }, { "loss": 4.19125, "learning_rate": 8.471839754066781e-06, "epoch": 0.7981622956821189, "total_flos": 761742154287759360, "step": 198400 }, { "loss": 4.12625, "learning_rate": 8.47102862867085e-06, "epoch": 0.7985645952263136, "total_flos": 762124839913635840, "step": 198500 }, { "loss": 4.105, "learning_rate": 8.47021750327492e-06, "epoch": 0.7989668947705082, "total_flos": 762523788563251200, "step": 198600 }, { "loss": 4.15125, "learning_rate": 8.46940637787899e-06, "epoch": 0.7993691943147029, "total_flos": 762901959633223680, "step": 198700 }, { "loss": 4.10875, "learning_rate": 8.468595252483058e-06, "epoch": 0.7997714938588975, "total_flos": 763266172758589440, "step": 198800 }, { "loss": 4.1375, "learning_rate": 8.467784127087128e-06, "epoch": 0.800173793403092, "total_flos": 763658264594472960, "step": 198900 }, { "loss": 4.1, "learning_rate": 8.466973001691196e-06, "epoch": 0.8005760929472867, "total_flos": 764033009913200640, "step": 199000 }, { "loss": 4.135, "learning_rate": 8.466161876295266e-06, "epoch": 0.8009783924914813, "total_flos": 764412992116776960, "step": 199100 }, { "loss": 4.1125, "learning_rate": 8.465350750899336e-06, "epoch": 0.801380692035676, "total_flos": 764789766330040320, "step": 199200 }, { "loss": 4.1275, "learning_rate": 8.464539625503404e-06, "epoch": 0.8017829915798705, "total_flos": 765186909157294080, "step": 199300 }, { "loss": 4.125, "learning_rate": 8.463728500107475e-06, "epoch": 0.8021852911240651, "total_flos": 765569706319257600, "step": 199400 }, { "loss": 4.13875, "learning_rate": 8.462917374711545e-06, "epoch": 0.8025875906682598, "total_flos": 765948891836497920, "step": 199500 }, { "loss": 4.11375, "learning_rate": 8.462106249315613e-06, "epoch": 0.8029898902124544, "total_flos": 766332591909642240, "step": 199600 }, { "loss": 4.125, "learning_rate": 8.461295123919683e-06, "epoch": 0.8033921897566491, "total_flos": 766713636361666560, "step": 199700 }, { "loss": 4.12375, "learning_rate": 8.460483998523753e-06, "epoch": 0.8037944893008436, "total_flos": 767093363625615360, "step": 199800 }, { "loss": 4.02875, "learning_rate": 8.459672873127821e-06, "epoch": 0.8041967888450382, "total_flos": 767493666642001920, "step": 199900 }, { "loss": 4.085, "learning_rate": 8.458861747731891e-06, "epoch": 0.8045990883892329, "total_flos": 767882948830740480, "step": 200000 }, { "loss": 4.13375, "learning_rate": 8.45805062233596e-06, "epoch": 0.8050013879334275, "total_flos": 768254799522447360, "step": 200100 }, { "loss": 4.16375, "learning_rate": 8.457239496940031e-06, "epoch": 0.8054036874776221, "total_flos": 768626209381048320, "step": 200200 }, { "loss": 4.11125, "learning_rate": 8.4564283715441e-06, "epoch": 0.8058059870218167, "total_flos": 769003610320896000, "step": 200300 }, { "loss": 4.11625, "learning_rate": 8.455617246148168e-06, "epoch": 0.8062082865660113, "total_flos": 769401741039206400, "step": 200400 }, { "loss": 4.13875, "learning_rate": 8.454806120752238e-06, "epoch": 0.806610586110206, "total_flos": 769798522701987840, "step": 200500 }, { "loss": 4.14, "learning_rate": 8.453994995356308e-06, "epoch": 0.8070128856544005, "total_flos": 770199085969244160, "step": 200600 }, { "loss": 4.105, "learning_rate": 8.453183869960377e-06, "epoch": 0.8074151851985951, "total_flos": 770583471192637440, "step": 200700 }, { "loss": 4.065, "learning_rate": 8.452372744564447e-06, "epoch": 0.8078174847427898, "total_flos": 770955821141114880, "step": 200800 }, { "loss": 4.13875, "learning_rate": 8.451561619168515e-06, "epoch": 0.8082197842869844, "total_flos": 771363204043407360, "step": 200900 }, { "loss": 4.0875, "learning_rate": 8.450750493772587e-06, "epoch": 0.8086220838311791, "total_flos": 771749161394503680, "step": 201000 }, { "loss": 4.0975, "learning_rate": 8.449939368376655e-06, "epoch": 0.8090243833753736, "total_flos": 772149613125672960, "step": 201100 }, { "loss": 4.08125, "learning_rate": 8.449128242980723e-06, "epoch": 0.8094266829195682, "total_flos": 772545422831124480, "step": 201200 }, { "loss": 4.10625, "learning_rate": 8.448317117584793e-06, "epoch": 0.8098289824637629, "total_flos": 772934115471974400, "step": 201300 }, { "loss": 4.03625, "learning_rate": 8.447505992188864e-06, "epoch": 0.8102312820079575, "total_flos": 773318139530895360, "step": 201400 }, { "loss": 4.04625, "learning_rate": 8.446694866792934e-06, "epoch": 0.8106335815521521, "total_flos": 773702583177953280, "step": 201500 }, { "loss": 4.07625, "learning_rate": 8.445883741397002e-06, "epoch": 0.8110358810963467, "total_flos": 774083712609853440, "step": 201600 }, { "loss": 4.11625, "learning_rate": 8.44507261600107e-06, "epoch": 0.8114381806405413, "total_flos": 774482252293816320, "step": 201700 }, { "loss": 4.10375, "learning_rate": 8.444261490605142e-06, "epoch": 0.811840480184736, "total_flos": 774876298666844160, "step": 201800 }, { "loss": 4.0725, "learning_rate": 8.44345036520921e-06, "epoch": 0.8122427797289306, "total_flos": 775265915463843840, "step": 201900 }, { "loss": 4.1075, "learning_rate": 8.442639239813279e-06, "epoch": 0.8126450792731252, "total_flos": 775643321714933760, "step": 202000 }, { "loss": 4.1025, "learning_rate": 8.441828114417349e-06, "epoch": 0.8130473788173198, "total_flos": 776017870517698560, "step": 202100 }, { "loss": 4.08375, "learning_rate": 8.441016989021419e-06, "epoch": 0.8134496783615144, "total_flos": 776393826799656960, "step": 202200 }, { "loss": 4.13375, "learning_rate": 8.440205863625489e-06, "epoch": 0.8138519779057091, "total_flos": 776775970678824960, "step": 202300 }, { "loss": 4.10875, "learning_rate": 8.439394738229557e-06, "epoch": 0.8142542774499036, "total_flos": 777167451721850880, "step": 202400 }, { "loss": 4.0675, "learning_rate": 8.438583612833626e-06, "epoch": 0.8146565769940982, "total_flos": 777565354056744960, "step": 202500 }, { "loss": 4.07625, "learning_rate": 8.437772487437697e-06, "epoch": 0.8150588765382929, "total_flos": 777920729275023360, "step": 202600 }, { "loss": 4.075, "learning_rate": 8.436961362041766e-06, "epoch": 0.8154611760824875, "total_flos": 778314308258734080, "step": 202700 }, { "loss": 4.06625, "learning_rate": 8.436150236645834e-06, "epoch": 0.8158634756266822, "total_flos": 778696877037281280, "step": 202800 }, { "loss": 4.06, "learning_rate": 8.435339111249904e-06, "epoch": 0.8162657751708767, "total_flos": 779092479604285440, "step": 202900 }, { "loss": 4.13125, "learning_rate": 8.434527985853974e-06, "epoch": 0.8166680747150713, "total_flos": 779462556341084160, "step": 203000 }, { "loss": 4.03375, "learning_rate": 8.433716860458044e-06, "epoch": 0.817070374259266, "total_flos": 779858907793244160, "step": 203100 }, { "loss": 4.0475, "learning_rate": 8.432905735062112e-06, "epoch": 0.8174726738034606, "total_flos": 780238316382658560, "step": 203200 }, { "loss": 4.1025, "learning_rate": 8.43209460966618e-06, "epoch": 0.8178749733476552, "total_flos": 780618457923502080, "step": 203300 }, { "loss": 4.05375, "learning_rate": 8.431283484270253e-06, "epoch": 0.8182772728918498, "total_flos": 781016105318768640, "step": 203400 }, { "loss": 4.09, "learning_rate": 8.430472358874321e-06, "epoch": 0.8186795724360444, "total_flos": 781402615039057920, "step": 203500 }, { "loss": 4.10875, "learning_rate": 8.42966123347839e-06, "epoch": 0.8190818719802391, "total_flos": 781794834344755200, "step": 203600 }, { "loss": 4.065, "learning_rate": 8.42885010808246e-06, "epoch": 0.8194841715244336, "total_flos": 782169993940377600, "step": 203700 }, { "loss": 4.1, "learning_rate": 8.42803898268653e-06, "epoch": 0.8198864710686283, "total_flos": 782565336256512000, "step": 203800 }, { "loss": 4.07875, "learning_rate": 8.4272278572906e-06, "epoch": 0.8202887706128229, "total_flos": 782950799662080000, "step": 203900 }, { "loss": 4.0175, "learning_rate": 8.426416731894668e-06, "epoch": 0.8206910701570175, "total_flos": 783332906362552320, "step": 204000 }, { "loss": 4.05625, "learning_rate": 8.425605606498736e-06, "epoch": 0.8210933697012122, "total_flos": 783705936150036480, "step": 204100 }, { "loss": 4.08875, "learning_rate": 8.424794481102808e-06, "epoch": 0.8214956692454067, "total_flos": 784102850593873920, "step": 204200 }, { "loss": 4.065, "learning_rate": 8.423983355706876e-06, "epoch": 0.8218979687896013, "total_flos": 784504757605416960, "step": 204300 }, { "loss": 4.07875, "learning_rate": 8.423172230310946e-06, "epoch": 0.822300268333796, "total_flos": 784887161735454720, "step": 204400 }, { "loss": 4.08125, "learning_rate": 8.422361104915016e-06, "epoch": 0.8227025678779906, "total_flos": 785265725837352960, "step": 204500 }, { "loss": 4.08625, "learning_rate": 8.421549979519085e-06, "epoch": 0.8231048674221852, "total_flos": 785638304169246720, "step": 204600 }, { "loss": 4.0675, "learning_rate": 8.420738854123155e-06, "epoch": 0.8235071669663798, "total_flos": 786013453142384640, "step": 204700 }, { "loss": 4.105, "learning_rate": 8.419927728727223e-06, "epoch": 0.8239094665105744, "total_flos": 786388644605460480, "step": 204800 }, { "loss": 4.055, "learning_rate": 8.419116603331293e-06, "epoch": 0.8243117660547691, "total_flos": 786758158350581760, "step": 204900 }, { "loss": 4.0825, "learning_rate": 8.418305477935363e-06, "epoch": 0.8247140655989637, "total_flos": 787164457759457280, "step": 205000 }, { "loss": 4.04625, "learning_rate": 8.417494352539431e-06, "epoch": 0.8251163651431583, "total_flos": 787552321846517760, "step": 205100 }, { "loss": 4.0775, "learning_rate": 8.416683227143501e-06, "epoch": 0.8255186646873529, "total_flos": 787935740423823360, "step": 205200 }, { "loss": 4.05125, "learning_rate": 8.415872101747571e-06, "epoch": 0.8259209642315475, "total_flos": 788313475971932160, "step": 205300 }, { "loss": 4.06, "learning_rate": 8.41506097635164e-06, "epoch": 0.8263232637757422, "total_flos": 788692284390973440, "step": 205400 }, { "loss": 4.02625, "learning_rate": 8.41424985095571e-06, "epoch": 0.8267255633199367, "total_flos": 789056683409817600, "step": 205500 }, { "loss": 4.10375, "learning_rate": 8.413438725559778e-06, "epoch": 0.8271278628641314, "total_flos": 789430100917985280, "step": 205600 }, { "loss": 4.0275, "learning_rate": 8.412627600163848e-06, "epoch": 0.827530162408326, "total_flos": 789796714724843520, "step": 205700 }, { "loss": 4.03, "learning_rate": 8.411816474767918e-06, "epoch": 0.8279324619525206, "total_flos": 790173069349969920, "step": 205800 }, { "loss": 4.06625, "learning_rate": 8.411005349371987e-06, "epoch": 0.8283347614967153, "total_flos": 790547841224908800, "step": 205900 }, { "loss": 4.14625, "learning_rate": 8.410194223976057e-06, "epoch": 0.8287370610409098, "total_flos": 790937505823088640, "step": 206000 }, { "loss": 4.085, "learning_rate": 8.409383098580127e-06, "epoch": 0.8291393605851045, "total_flos": 791309930128957440, "step": 206100 }, { "loss": 4.10625, "learning_rate": 8.408571973184195e-06, "epoch": 0.8295416601292991, "total_flos": 791696344246886400, "step": 206200 }, { "loss": 4.0525, "learning_rate": 8.407760847788265e-06, "epoch": 0.8299439596734937, "total_flos": 792089705469665280, "step": 206300 }, { "loss": 4.08625, "learning_rate": 8.406949722392333e-06, "epoch": 0.8303462592176883, "total_flos": 792477983833620480, "step": 206400 }, { "loss": 4.05125, "learning_rate": 8.406138596996404e-06, "epoch": 0.8307485587618829, "total_flos": 792856840053841920, "step": 206500 }, { "loss": 4.0725, "learning_rate": 8.405327471600474e-06, "epoch": 0.8311508583060775, "total_flos": 793240518882017280, "step": 206600 }, { "loss": 4.0425, "learning_rate": 8.404516346204542e-06, "epoch": 0.8315531578502722, "total_flos": 793634023508336640, "step": 206700 }, { "loss": 4.06125, "learning_rate": 8.403705220808612e-06, "epoch": 0.8319554573944667, "total_flos": 794017569555456000, "step": 206800 }, { "loss": 4.03, "learning_rate": 8.402894095412682e-06, "epoch": 0.8323577569386614, "total_flos": 794397711096299520, "step": 206900 }, { "loss": 4.05875, "learning_rate": 8.40208297001675e-06, "epoch": 0.832760056482856, "total_flos": 794781873247518720, "step": 207000 }, { "loss": 4.04875, "learning_rate": 8.40127184462082e-06, "epoch": 0.8331623560270506, "total_flos": 795158551858421760, "step": 207100 }, { "loss": 4.0625, "learning_rate": 8.400460719224889e-06, "epoch": 0.8335646555712453, "total_flos": 795545900754984960, "step": 207200 }, { "loss": 4.03875, "learning_rate": 8.399649593828959e-06, "epoch": 0.8339669551154398, "total_flos": 795921793302036480, "step": 207300 }, { "loss": 4.06625, "learning_rate": 8.398838468433029e-06, "epoch": 0.8343692546596345, "total_flos": 796323540976312320, "step": 207400 }, { "loss": 4.0775, "learning_rate": 8.398027343037097e-06, "epoch": 0.8347715542038291, "total_flos": 796711660003000320, "step": 207500 }, { "loss": 4.09625, "learning_rate": 8.397216217641167e-06, "epoch": 0.8351738537480237, "total_flos": 797092789434900480, "step": 207600 }, { "loss": 4.055, "learning_rate": 8.396405092245237e-06, "epoch": 0.8355761532922183, "total_flos": 797473058445557760, "step": 207700 }, { "loss": 4.115, "learning_rate": 8.395593966849306e-06, "epoch": 0.8359784528364129, "total_flos": 797854862405222400, "step": 207800 }, { "loss": 4.07625, "learning_rate": 8.394782841453376e-06, "epoch": 0.8363807523806076, "total_flos": 798239895600168960, "step": 207900 }, { "loss": 4.06125, "learning_rate": 8.393971716057444e-06, "epoch": 0.8367830519248022, "total_flos": 798628694465863680, "step": 208000 }, { "loss": 4.02625, "learning_rate": 8.393160590661514e-06, "epoch": 0.8371853514689968, "total_flos": 799007678155898880, "step": 208100 }, { "loss": 4.0575, "learning_rate": 8.392349465265584e-06, "epoch": 0.8375876510131914, "total_flos": 799397146238115840, "step": 208200 }, { "loss": 4.07125, "learning_rate": 8.391538339869652e-06, "epoch": 0.837989950557386, "total_flos": 799785573316853760, "step": 208300 }, { "loss": 4.08125, "learning_rate": 8.390727214473722e-06, "epoch": 0.8383922501015806, "total_flos": 800176613526773760, "step": 208400 }, { "loss": 4.07875, "learning_rate": 8.389916089077793e-06, "epoch": 0.8387945496457753, "total_flos": 800550599337861120, "step": 208500 }, { "loss": 4.05, "learning_rate": 8.389104963681861e-06, "epoch": 0.8391968491899698, "total_flos": 800941458965544960, "step": 208600 }, { "loss": 4.055, "learning_rate": 8.388293838285931e-06, "epoch": 0.8395991487341645, "total_flos": 801319539744399360, "step": 208700 }, { "loss": 4.0375, "learning_rate": 8.38748271289e-06, "epoch": 0.8400014482783591, "total_flos": 801686010147717120, "step": 208800 }, { "loss": 4.00125, "learning_rate": 8.38667158749407e-06, "epoch": 0.8404037478225537, "total_flos": 802083800946524160, "step": 208900 }, { "loss": 4.02625, "learning_rate": 8.38586046209814e-06, "epoch": 0.8408060473667484, "total_flos": 802471351670292480, "step": 209000 }, { "loss": 4.0125, "learning_rate": 8.385049336702208e-06, "epoch": 0.8412083469109429, "total_flos": 802847148614983680, "step": 209100 }, { "loss": 4.01875, "learning_rate": 8.384238211306278e-06, "epoch": 0.8416106464551376, "total_flos": 803227444181852160, "step": 209200 }, { "loss": 4.055, "learning_rate": 8.383427085910348e-06, "epoch": 0.8420129459993322, "total_flos": 803618224140902400, "step": 209300 }, { "loss": 4.0175, "learning_rate": 8.382615960514416e-06, "epoch": 0.8424152455435268, "total_flos": 804026632112947200, "step": 209400 }, { "loss": 4.0675, "learning_rate": 8.381804835118486e-06, "epoch": 0.8428175450877214, "total_flos": 804407851835965440, "step": 209500 }, { "loss": 4.0225, "learning_rate": 8.380993709722556e-06, "epoch": 0.843219844631916, "total_flos": 804797149958430720, "step": 209600 }, { "loss": 4.00375, "learning_rate": 8.380182584326625e-06, "epoch": 0.8436221441761107, "total_flos": 805168501393367040, "step": 209700 }, { "loss": 4.0325, "learning_rate": 8.379371458930695e-06, "epoch": 0.8440244437203053, "total_flos": 805534185732833280, "step": 209800 }, { "loss": 4.0275, "learning_rate": 8.378560333534763e-06, "epoch": 0.8444267432644998, "total_flos": 805900826095902720, "step": 209900 }, { "loss": 4.03625, "learning_rate": 8.377749208138833e-06, "epoch": 0.8448290428086945, "total_flos": 806284658950103040, "step": 210000 }, { "loss": 4.03625, "learning_rate": 8.376938082742903e-06, "epoch": 0.8452313423528891, "total_flos": 806662845953802240, "step": 210100 }, { "loss": 4.05625, "learning_rate": 8.376126957346971e-06, "epoch": 0.8456336418970837, "total_flos": 807037235419299840, "step": 210200 }, { "loss": 4.0125, "learning_rate": 8.375315831951041e-06, "epoch": 0.8460359414412784, "total_flos": 807443938482585600, "step": 210300 }, { "loss": 4.0675, "learning_rate": 8.374504706555111e-06, "epoch": 0.8464382409854729, "total_flos": 807835286744555520, "step": 210400 }, { "loss": 4.08125, "learning_rate": 8.37369358115918e-06, "epoch": 0.8468405405296676, "total_flos": 808215911608442880, "step": 210500 }, { "loss": 4.0425, "learning_rate": 8.37288245576325e-06, "epoch": 0.8472428400738622, "total_flos": 808596424936243200, "step": 210600 }, { "loss": 4.0075, "learning_rate": 8.372071330367318e-06, "epoch": 0.8476451396180568, "total_flos": 808968854553354240, "step": 210700 }, { "loss": 4.03, "learning_rate": 8.371260204971388e-06, "epoch": 0.8480474391622514, "total_flos": 809376720778690560, "step": 210800 }, { "loss": 4.005, "learning_rate": 8.370449079575458e-06, "epoch": 0.848449738706446, "total_flos": 809776466114641920, "step": 210900 }, { "loss": 4.00375, "learning_rate": 8.369637954179527e-06, "epoch": 0.8488520382506407, "total_flos": 810160660133314560, "step": 211000 }, { "loss": 4.08375, "learning_rate": 8.368826828783597e-06, "epoch": 0.8492543377948353, "total_flos": 810551084239134720, "step": 211100 }, { "loss": 4.0275, "learning_rate": 8.368015703387667e-06, "epoch": 0.8496566373390299, "total_flos": 810927401685565440, "step": 211200 }, { "loss": 3.9575, "learning_rate": 8.367204577991735e-06, "epoch": 0.8500589368832245, "total_flos": 811299077106278400, "step": 211300 }, { "loss": 4.02125, "learning_rate": 8.366393452595805e-06, "epoch": 0.8504612364274191, "total_flos": 811695109883904000, "step": 211400 }, { "loss": 4.03125, "learning_rate": 8.365582327199873e-06, "epoch": 0.8508635359716138, "total_flos": 812072707339714560, "step": 211500 }, { "loss": 4.09375, "learning_rate": 8.364771201803944e-06, "epoch": 0.8512658355158084, "total_flos": 812456396790374400, "step": 211600 }, { "loss": 4.0525, "learning_rate": 8.363960076408014e-06, "epoch": 0.8516681350600029, "total_flos": 812852992559677440, "step": 211700 }, { "loss": 4.0525, "learning_rate": 8.363148951012082e-06, "epoch": 0.8520704346041976, "total_flos": 813224848562626560, "step": 211800 }, { "loss": 4.035, "learning_rate": 8.362337825616152e-06, "epoch": 0.8524727341483922, "total_flos": 813624025595658240, "step": 211900 }, { "loss": 4.045, "learning_rate": 8.361526700220222e-06, "epoch": 0.8528750336925869, "total_flos": 814015193275392000, "step": 212000 }, { "loss": 4.04375, "learning_rate": 8.36071557482429e-06, "epoch": 0.8532773332367815, "total_flos": 814412213944074240, "step": 212100 }, { "loss": 4.0375, "learning_rate": 8.35990444942836e-06, "epoch": 0.853679632780976, "total_flos": 814784595760005120, "step": 212200 }, { "loss": 4.01375, "learning_rate": 8.359093324032429e-06, "epoch": 0.8540819323251707, "total_flos": 815179295415828480, "step": 212300 }, { "loss": 4.00625, "learning_rate": 8.358282198636499e-06, "epoch": 0.8544842318693653, "total_flos": 815575121055006720, "step": 212400 }, { "loss": 4.02, "learning_rate": 8.357471073240569e-06, "epoch": 0.8548865314135599, "total_flos": 815965555783311360, "step": 212500 }, { "loss": 4.02, "learning_rate": 8.356659947844637e-06, "epoch": 0.8552888309577545, "total_flos": 816341358039244800, "step": 212600 }, { "loss": 3.99875, "learning_rate": 8.355848822448707e-06, "epoch": 0.8556911305019491, "total_flos": 816729030921584640, "step": 212700 }, { "loss": 4.04125, "learning_rate": 8.355037697052777e-06, "epoch": 0.8560934300461438, "total_flos": 817100796633415680, "step": 212800 }, { "loss": 3.99375, "learning_rate": 8.354226571656846e-06, "epoch": 0.8564957295903384, "total_flos": 817500940312535040, "step": 212900 }, { "loss": 4.03375, "learning_rate": 8.353415446260916e-06, "epoch": 0.856898029134533, "total_flos": 817894455561338880, "step": 213000 }, { "loss": 3.97625, "learning_rate": 8.352604320864984e-06, "epoch": 0.8573003286787276, "total_flos": 818280242952683520, "step": 213100 }, { "loss": 4.09, "learning_rate": 8.351793195469054e-06, "epoch": 0.8577026282229222, "total_flos": 818645709531217920, "step": 213200 }, { "loss": 3.99625, "learning_rate": 8.350982070073124e-06, "epoch": 0.8581049277671169, "total_flos": 819021357761126400, "step": 213300 }, { "loss": 4.0675, "learning_rate": 8.350170944677192e-06, "epoch": 0.8585072273113115, "total_flos": 819413640801730560, "step": 213400 }, { "loss": 4.02375, "learning_rate": 8.349359819281262e-06, "epoch": 0.858909526855506, "total_flos": 819809870095319040, "step": 213500 }, { "loss": 3.965, "learning_rate": 8.348548693885333e-06, "epoch": 0.8593118263997007, "total_flos": 820183553165598720, "step": 213600 }, { "loss": 3.98125, "learning_rate": 8.347737568489401e-06, "epoch": 0.8597141259438953, "total_flos": 820573578928250880, "step": 213700 }, { "loss": 4.00125, "learning_rate": 8.346926443093471e-06, "epoch": 0.86011642548809, "total_flos": 820956960326860800, "step": 213800 }, { "loss": 4.065, "learning_rate": 8.346115317697541e-06, "epoch": 0.8605187250322845, "total_flos": 821341122478080000, "step": 213900 }, { "loss": 4.045, "learning_rate": 8.34530419230161e-06, "epoch": 0.8609210245764791, "total_flos": 821715522566062080, "step": 214000 }, { "loss": 4.01875, "learning_rate": 8.34449306690568e-06, "epoch": 0.8613233241206738, "total_flos": 822107311661137920, "step": 214100 }, { "loss": 3.96625, "learning_rate": 8.343681941509748e-06, "epoch": 0.8617256236648684, "total_flos": 822486699005583360, "step": 214200 }, { "loss": 3.98375, "learning_rate": 8.342870816113818e-06, "epoch": 0.862127923209063, "total_flos": 822864620447170560, "step": 214300 }, { "loss": 4.00125, "learning_rate": 8.342059690717888e-06, "epoch": 0.8625302227532576, "total_flos": 823264833172439040, "step": 214400 }, { "loss": 4.06, "learning_rate": 8.341248565321956e-06, "epoch": 0.8629325222974522, "total_flos": 823661466120437760, "step": 214500 }, { "loss": 4.06625, "learning_rate": 8.340437439926026e-06, "epoch": 0.8633348218416469, "total_flos": 824051566240481280, "step": 214600 }, { "loss": 4.02125, "learning_rate": 8.339626314530096e-06, "epoch": 0.8637371213858415, "total_flos": 824429068093931520, "step": 214700 }, { "loss": 4.0375, "learning_rate": 8.338815189134165e-06, "epoch": 0.864139420930036, "total_flos": 824805289938001920, "step": 214800 }, { "loss": 4.01875, "learning_rate": 8.338004063738235e-06, "epoch": 0.8645417204742307, "total_flos": 825193701083013120, "step": 214900 }, { "loss": 3.97, "learning_rate": 8.337192938342303e-06, "epoch": 0.8649440200184253, "total_flos": 825560601696952320, "step": 215000 }, { "loss": 4.00375, "learning_rate": 8.336381812946373e-06, "epoch": 0.86534631956262, "total_flos": 825945151568855040, "step": 215100 }, { "loss": 4.0575, "learning_rate": 8.335570687550443e-06, "epoch": 0.8657486191068146, "total_flos": 826332707603865600, "step": 215200 }, { "loss": 3.95625, "learning_rate": 8.334759562154511e-06, "epoch": 0.8661509186510091, "total_flos": 826729223704535040, "step": 215300 }, { "loss": 4.01625, "learning_rate": 8.333948436758581e-06, "epoch": 0.8665532181952038, "total_flos": 827119833703833600, "step": 215400 }, { "loss": 3.99125, "learning_rate": 8.333137311362651e-06, "epoch": 0.8669555177393984, "total_flos": 827518336209100800, "step": 215500 }, { "loss": 3.93875, "learning_rate": 8.33232618596672e-06, "epoch": 0.8673578172835931, "total_flos": 827900124235038720, "step": 215600 }, { "loss": 4.0, "learning_rate": 8.33151506057079e-06, "epoch": 0.8677601168277876, "total_flos": 828277668578426880, "step": 215700 }, { "loss": 4.01625, "learning_rate": 8.330703935174858e-06, "epoch": 0.8681624163719822, "total_flos": 828644903800627200, "step": 215800 }, { "loss": 4.0, "learning_rate": 8.329892809778928e-06, "epoch": 0.8685647159161769, "total_flos": 829013270317424640, "step": 215900 }, { "loss": 3.98, "learning_rate": 8.329081684382998e-06, "epoch": 0.8689670154603715, "total_flos": 829387118036213760, "step": 216000 }, { "loss": 3.96625, "learning_rate": 8.328270558987067e-06, "epoch": 0.869369315004566, "total_flos": 829760822351462400, "step": 216100 }, { "loss": 3.985, "learning_rate": 8.327459433591137e-06, "epoch": 0.8697716145487607, "total_flos": 830136815812116480, "step": 216200 }, { "loss": 3.99625, "learning_rate": 8.326648308195207e-06, "epoch": 0.8701739140929553, "total_flos": 830520871738490880, "step": 216300 }, { "loss": 3.97375, "learning_rate": 8.325837182799275e-06, "epoch": 0.87057621363715, "total_flos": 830907567352258560, "step": 216400 }, { "loss": 4.03375, "learning_rate": 8.325026057403345e-06, "epoch": 0.8709785131813446, "total_flos": 831303265521623040, "step": 216500 }, { "loss": 3.9825, "learning_rate": 8.324214932007413e-06, "epoch": 0.8713808127255391, "total_flos": 831673496284446720, "step": 216600 }, { "loss": 3.9625, "learning_rate": 8.323403806611484e-06, "epoch": 0.8717831122697338, "total_flos": 832055650786099200, "step": 216700 }, { "loss": 4.025, "learning_rate": 8.322592681215554e-06, "epoch": 0.8721854118139284, "total_flos": 832438633841541120, "step": 216800 }, { "loss": 4.0225, "learning_rate": 8.321781555819622e-06, "epoch": 0.8725877113581231, "total_flos": 832830667253760000, "step": 216900 }, { "loss": 3.9825, "learning_rate": 8.320970430423692e-06, "epoch": 0.8729900109023176, "total_flos": 833212184406343680, "step": 217000 }, { "loss": 3.9875, "learning_rate": 8.320159305027762e-06, "epoch": 0.8733923104465122, "total_flos": 833604185951109120, "step": 217100 }, { "loss": 4.0125, "learning_rate": 8.31934817963183e-06, "epoch": 0.8737946099907069, "total_flos": 833981847141826560, "step": 217200 }, { "loss": 3.99125, "learning_rate": 8.3185370542359e-06, "epoch": 0.8741969095349015, "total_flos": 834390600344616960, "step": 217300 }, { "loss": 3.95375, "learning_rate": 8.317725928839969e-06, "epoch": 0.8745992090790962, "total_flos": 834757362866257920, "step": 217400 }, { "loss": 3.98, "learning_rate": 8.316914803444039e-06, "epoch": 0.8750015086232907, "total_flos": 835149295364874240, "step": 217500 }, { "loss": 3.95875, "learning_rate": 8.316103678048109e-06, "epoch": 0.8754038081674853, "total_flos": 835533218510192640, "step": 217600 }, { "loss": 4.0675, "learning_rate": 8.315292552652177e-06, "epoch": 0.87580610771168, "total_flos": 835921560609054720, "step": 217700 }, { "loss": 3.9825, "learning_rate": 8.314481427256247e-06, "epoch": 0.8762084072558746, "total_flos": 836295068408340480, "step": 217800 }, { "loss": 3.995, "learning_rate": 8.313670301860317e-06, "epoch": 0.8766107068000693, "total_flos": 836674800983531520, "step": 217900 }, { "loss": 3.97375, "learning_rate": 8.312859176464386e-06, "epoch": 0.8770130063442638, "total_flos": 837057635324190720, "step": 218000 }, { "loss": 3.97875, "learning_rate": 8.312048051068456e-06, "epoch": 0.8774153058884584, "total_flos": 837448319680880640, "step": 218100 }, { "loss": 3.93125, "learning_rate": 8.311236925672524e-06, "epoch": 0.8778176054326531, "total_flos": 837840974508441600, "step": 218200 }, { "loss": 4.01125, "learning_rate": 8.310425800276594e-06, "epoch": 0.8782199049768477, "total_flos": 838231850069852160, "step": 218300 }, { "loss": 4.0075, "learning_rate": 8.309614674880664e-06, "epoch": 0.8786222045210422, "total_flos": 838607710749450240, "step": 218400 }, { "loss": 3.94375, "learning_rate": 8.308803549484732e-06, "epoch": 0.8790245040652369, "total_flos": 838994995911106560, "step": 218500 }, { "loss": 3.985, "learning_rate": 8.307992424088802e-06, "epoch": 0.8794268036094315, "total_flos": 839395410463580160, "step": 218600 }, { "loss": 3.9975, "learning_rate": 8.307181298692873e-06, "epoch": 0.8798291031536262, "total_flos": 839766953103237120, "step": 218700 }, { "loss": 3.94625, "learning_rate": 8.306370173296941e-06, "epoch": 0.8802314026978207, "total_flos": 840159809758003200, "step": 218800 }, { "loss": 4.04875, "learning_rate": 8.305559047901011e-06, "epoch": 0.8806337022420153, "total_flos": 840538804070522880, "step": 218900 }, { "loss": 3.98, "learning_rate": 8.304747922505081e-06, "epoch": 0.88103600178621, "total_flos": 840915572972544000, "step": 219000 }, { "loss": 3.98875, "learning_rate": 8.30393679710915e-06, "epoch": 0.8814383013304046, "total_flos": 841296208458915840, "step": 219100 }, { "loss": 4.0125, "learning_rate": 8.30312567171322e-06, "epoch": 0.8818406008745993, "total_flos": 841680992025477120, "step": 219200 }, { "loss": 3.98, "learning_rate": 8.302314546317288e-06, "epoch": 0.8822429004187938, "total_flos": 842060251900108800, "step": 219300 }, { "loss": 3.985, "learning_rate": 8.301503420921358e-06, "epoch": 0.8826451999629884, "total_flos": 842459662627799040, "step": 219400 }, { "loss": 3.96375, "learning_rate": 8.300692295525428e-06, "epoch": 0.8830474995071831, "total_flos": 842851504835297280, "step": 219500 }, { "loss": 3.9675, "learning_rate": 8.299881170129496e-06, "epoch": 0.8834497990513777, "total_flos": 843229713083965440, "step": 219600 }, { "loss": 3.985, "learning_rate": 8.299070044733566e-06, "epoch": 0.8838520985955723, "total_flos": 843599040935608320, "step": 219700 }, { "loss": 3.9925, "learning_rate": 8.298258919337636e-06, "epoch": 0.8842543981397669, "total_flos": 843974168663777280, "step": 219800 }, { "loss": 3.97875, "learning_rate": 8.297447793941705e-06, "epoch": 0.8846566976839615, "total_flos": 844358532642201600, "step": 219900 }, { "loss": 3.94125, "learning_rate": 8.296636668545775e-06, "epoch": 0.8850589972281562, "total_flos": 844745557552988160, "step": 220000 }, { "loss": 3.9675, "learning_rate": 8.295825543149843e-06, "epoch": 0.8854612967723507, "total_flos": 845123957006376960, "step": 220100 }, { "loss": 4.00375, "learning_rate": 8.295014417753913e-06, "epoch": 0.8858635963165453, "total_flos": 845510307389399040, "step": 220200 }, { "loss": 3.94375, "learning_rate": 8.294203292357983e-06, "epoch": 0.88626589586074, "total_flos": 845888759955210240, "step": 220300 }, { "loss": 3.94875, "learning_rate": 8.293392166962051e-06, "epoch": 0.8866681954049346, "total_flos": 846285748756439040, "step": 220400 }, { "loss": 3.9375, "learning_rate": 8.292581041566121e-06, "epoch": 0.8870704949491293, "total_flos": 846668747745607680, "step": 220500 }, { "loss": 4.0225, "learning_rate": 8.291769916170191e-06, "epoch": 0.8874727944933238, "total_flos": 847077145095168000, "step": 220600 }, { "loss": 3.93, "learning_rate": 8.29095879077426e-06, "epoch": 0.8878750940375184, "total_flos": 847469964571238400, "step": 220700 }, { "loss": 3.9575, "learning_rate": 8.29014766537833e-06, "epoch": 0.8882773935817131, "total_flos": 847851433922641920, "step": 220800 }, { "loss": 3.9075, "learning_rate": 8.289336539982398e-06, "epoch": 0.8886796931259077, "total_flos": 848240795780014080, "step": 220900 }, { "loss": 3.95875, "learning_rate": 8.288525414586468e-06, "epoch": 0.8890819926701024, "total_flos": 848620390262906880, "step": 221000 }, { "loss": 3.97125, "learning_rate": 8.287714289190538e-06, "epoch": 0.8894842922142969, "total_flos": 849003314894684160, "step": 221100 }, { "loss": 3.945, "learning_rate": 8.286903163794607e-06, "epoch": 0.8898865917584915, "total_flos": 849393303478640640, "step": 221200 }, { "loss": 3.93875, "learning_rate": 8.286092038398677e-06, "epoch": 0.8902888913026862, "total_flos": 849772340281098240, "step": 221300 }, { "loss": 4.005, "learning_rate": 8.285280913002747e-06, "epoch": 0.8906911908468808, "total_flos": 850162907790458880, "step": 221400 }, { "loss": 3.95125, "learning_rate": 8.284469787606815e-06, "epoch": 0.8910934903910754, "total_flos": 850538486974218240, "step": 221500 }, { "loss": 3.93, "learning_rate": 8.283658662210885e-06, "epoch": 0.89149578993527, "total_flos": 850924911714631680, "step": 221600 }, { "loss": 3.955, "learning_rate": 8.282847536814953e-06, "epoch": 0.8918980894794646, "total_flos": 851299672967086080, "step": 221700 }, { "loss": 3.9475, "learning_rate": 8.282036411419025e-06, "epoch": 0.8923003890236593, "total_flos": 851675942612336640, "step": 221800 }, { "loss": 3.9775, "learning_rate": 8.281225286023094e-06, "epoch": 0.8927026885678538, "total_flos": 852055999173304320, "step": 221900 }, { "loss": 3.9875, "learning_rate": 8.280414160627162e-06, "epoch": 0.8931049881120484, "total_flos": 852440841163530240, "step": 222000 }, { "loss": 3.9275, "learning_rate": 8.279603035231232e-06, "epoch": 0.8935072876562431, "total_flos": 852823664881704960, "step": 222100 }, { "loss": 3.92125, "learning_rate": 8.278791909835302e-06, "epoch": 0.8939095872004377, "total_flos": 853207513669632000, "step": 222200 }, { "loss": 3.89375, "learning_rate": 8.27798078443937e-06, "epoch": 0.8943118867446324, "total_flos": 853597300426383360, "step": 222300 }, { "loss": 3.9475, "learning_rate": 8.27716965904344e-06, "epoch": 0.8947141862888269, "total_flos": 853974281778094080, "step": 222400 }, { "loss": 4.00625, "learning_rate": 8.276358533647509e-06, "epoch": 0.8951164858330215, "total_flos": 854350971011481600, "step": 222500 }, { "loss": 3.9475, "learning_rate": 8.27554740825158e-06, "epoch": 0.8955187853772162, "total_flos": 854751868886999040, "step": 222600 }, { "loss": 3.95, "learning_rate": 8.274736282855649e-06, "epoch": 0.8959210849214108, "total_flos": 855131840468090880, "step": 222700 }, { "loss": 3.9525, "learning_rate": 8.273925157459717e-06, "epoch": 0.8963233844656054, "total_flos": 855520049785896960, "step": 222800 }, { "loss": 3.94625, "learning_rate": 8.273114032063787e-06, "epoch": 0.8967256840098, "total_flos": 855904700571402240, "step": 222900 }, { "loss": 3.9125, "learning_rate": 8.272302906667857e-06, "epoch": 0.8971279835539946, "total_flos": 856288336909639680, "step": 223000 }, { "loss": 3.9825, "learning_rate": 8.271491781271926e-06, "epoch": 0.8975302830981893, "total_flos": 856670220537937920, "step": 223100 }, { "loss": 3.91, "learning_rate": 8.270680655875996e-06, "epoch": 0.8979325826423838, "total_flos": 857050691375800320, "step": 223200 }, { "loss": 3.95375, "learning_rate": 8.269869530480064e-06, "epoch": 0.8983348821865785, "total_flos": 857428156050554880, "step": 223300 }, { "loss": 3.955, "learning_rate": 8.269058405084136e-06, "epoch": 0.8987371817307731, "total_flos": 857820709964513280, "step": 223400 }, { "loss": 3.94, "learning_rate": 8.268247279688204e-06, "epoch": 0.8991394812749677, "total_flos": 858213476328161280, "step": 223500 }, { "loss": 3.96625, "learning_rate": 8.267436154292272e-06, "epoch": 0.8995417808191624, "total_flos": 858603831387832320, "step": 223600 }, { "loss": 3.94875, "learning_rate": 8.266625028896344e-06, "epoch": 0.8999440803633569, "total_flos": 858993729680670720, "step": 223700 }, { "loss": 3.9825, "learning_rate": 8.265813903500413e-06, "epoch": 0.9003463799075516, "total_flos": 859381928375992320, "step": 223800 }, { "loss": 3.95125, "learning_rate": 8.265002778104483e-06, "epoch": 0.9007486794517462, "total_flos": 859753789690183680, "step": 223900 }, { "loss": 3.95875, "learning_rate": 8.264191652708551e-06, "epoch": 0.9011509789959408, "total_flos": 860141829048238080, "step": 224000 }, { "loss": 3.915, "learning_rate": 8.263380527312621e-06, "epoch": 0.9015532785401354, "total_flos": 860526809130762240, "step": 224100 }, { "loss": 3.98, "learning_rate": 8.262569401916691e-06, "epoch": 0.90195557808433, "total_flos": 860938069239889920, "step": 224200 }, { "loss": 3.99, "learning_rate": 8.26175827652076e-06, "epoch": 0.9023578776285246, "total_flos": 861324908257198080, "step": 224300 }, { "loss": 3.8775, "learning_rate": 8.260947151124828e-06, "epoch": 0.9027601771727193, "total_flos": 861710148590592000, "step": 224400 }, { "loss": 3.905, "learning_rate": 8.2601360257289e-06, "epoch": 0.9031624767169139, "total_flos": 862094565681438720, "step": 224500 }, { "loss": 3.9775, "learning_rate": 8.259324900332968e-06, "epoch": 0.9035647762611085, "total_flos": 862476868897873920, "step": 224600 }, { "loss": 3.96, "learning_rate": 8.258513774937038e-06, "epoch": 0.9039670758053031, "total_flos": 862854089255485440, "step": 224700 }, { "loss": 3.91625, "learning_rate": 8.257702649541106e-06, "epoch": 0.9043693753494977, "total_flos": 863227411161292800, "step": 224800 }, { "loss": 3.9725, "learning_rate": 8.256891524145176e-06, "epoch": 0.9047716748936924, "total_flos": 863613602207047680, "step": 224900 }, { "loss": 3.995, "learning_rate": 8.256080398749246e-06, "epoch": 0.9051739744378869, "total_flos": 863991082815528960, "step": 225000 }, { "loss": 3.92625, "learning_rate": 8.255269273353315e-06, "epoch": 0.9055762739820816, "total_flos": 864375388370288640, "step": 225100 }, { "loss": 3.975, "learning_rate": 8.254458147957383e-06, "epoch": 0.9059785735262762, "total_flos": 864751642081812480, "step": 225200 }, { "loss": 3.93375, "learning_rate": 8.253647022561455e-06, "epoch": 0.9063808730704708, "total_flos": 865114224655810560, "step": 225300 }, { "loss": 3.93875, "learning_rate": 8.252835897165523e-06, "epoch": 0.9067831726146655, "total_flos": 865510618597908480, "step": 225400 }, { "loss": 3.9125, "learning_rate": 8.252024771769593e-06, "epoch": 0.90718547215886, "total_flos": 865893713189437440, "step": 225500 }, { "loss": 3.98125, "learning_rate": 8.251213646373661e-06, "epoch": 0.9075877717030547, "total_flos": 866270683918663680, "step": 225600 }, { "loss": 3.92875, "learning_rate": 8.250402520977731e-06, "epoch": 0.9079900712472493, "total_flos": 866657119281561600, "step": 225700 }, { "loss": 3.9, "learning_rate": 8.249591395581802e-06, "epoch": 0.9083923707914439, "total_flos": 867040112959488000, "step": 225800 }, { "loss": 3.93625, "learning_rate": 8.24878027018587e-06, "epoch": 0.9087946703356385, "total_flos": 867427897377914880, "step": 225900 }, { "loss": 3.94125, "learning_rate": 8.247969144789938e-06, "epoch": 0.9091969698798331, "total_flos": 867818045299138560, "step": 226000 }, { "loss": 3.99375, "learning_rate": 8.24715801939401e-06, "epoch": 0.9095992694240277, "total_flos": 868195648066191360, "step": 226100 }, { "loss": 3.90875, "learning_rate": 8.246346893998078e-06, "epoch": 0.9100015689682224, "total_flos": 868567928968519680, "step": 226200 }, { "loss": 3.93125, "learning_rate": 8.245535768602148e-06, "epoch": 0.910403868512417, "total_flos": 868951368790794240, "step": 226300 }, { "loss": 3.94375, "learning_rate": 8.244724643206217e-06, "epoch": 0.9108061680566116, "total_flos": 869342403689472000, "step": 226400 }, { "loss": 3.96375, "learning_rate": 8.243913517810287e-06, "epoch": 0.9112084676008062, "total_flos": 869726969495101440, "step": 226500 }, { "loss": 3.93625, "learning_rate": 8.243102392414357e-06, "epoch": 0.9116107671450008, "total_flos": 870103409100103680, "step": 226600 }, { "loss": 3.875, "learning_rate": 8.242291267018425e-06, "epoch": 0.9120130666891955, "total_flos": 870488649433497600, "step": 226700 }, { "loss": 3.88375, "learning_rate": 8.241480141622495e-06, "epoch": 0.91241536623339, "total_flos": 870875828370309120, "step": 226800 }, { "loss": 3.9475, "learning_rate": 8.240669016226565e-06, "epoch": 0.9128176657775847, "total_flos": 871274245895700480, "step": 226900 }, { "loss": 3.9875, "learning_rate": 8.239857890830634e-06, "epoch": 0.9132199653217793, "total_flos": 871653840378593280, "step": 227000 }, { "loss": 3.94, "learning_rate": 8.239046765434704e-06, "epoch": 0.9136222648659739, "total_flos": 872043558089195520, "step": 227100 }, { "loss": 3.9375, "learning_rate": 8.238235640038772e-06, "epoch": 0.9140245644101685, "total_flos": 872431661182156800, "step": 227200 }, { "loss": 3.93625, "learning_rate": 8.237424514642842e-06, "epoch": 0.9144268639543631, "total_flos": 872815839267102720, "step": 227300 }, { "loss": 3.97375, "learning_rate": 8.236613389246912e-06, "epoch": 0.9148291634985578, "total_flos": 873196140145213440, "step": 227400 }, { "loss": 3.90625, "learning_rate": 8.23580226385098e-06, "epoch": 0.9152314630427524, "total_flos": 873573875693322240, "step": 227500 }, { "loss": 3.8975, "learning_rate": 8.23499113845505e-06, "epoch": 0.915633762586947, "total_flos": 873941100293038080, "step": 227600 }, { "loss": 3.8525, "learning_rate": 8.23418001305912e-06, "epoch": 0.9160360621311416, "total_flos": 874337393321533440, "step": 227700 }, { "loss": 3.895, "learning_rate": 8.233368887663189e-06, "epoch": 0.9164383616753362, "total_flos": 874726420570644480, "step": 227800 }, { "loss": 3.9125, "learning_rate": 8.232557762267259e-06, "epoch": 0.9168406612195308, "total_flos": 875117200529694720, "step": 227900 }, { "loss": 3.9, "learning_rate": 8.231746636871327e-06, "epoch": 0.9172429607637255, "total_flos": 875499965824204800, "step": 228000 }, { "loss": 3.96, "learning_rate": 8.230935511475397e-06, "epoch": 0.91764526030792, "total_flos": 875885928486543360, "step": 228100 }, { "loss": 3.955, "learning_rate": 8.230124386079467e-06, "epoch": 0.9180475598521147, "total_flos": 876261194307010560, "step": 228200 }, { "loss": 3.92125, "learning_rate": 8.229313260683536e-06, "epoch": 0.9184498593963093, "total_flos": 876635674063626240, "step": 228300 }, { "loss": 3.935, "learning_rate": 8.228502135287606e-06, "epoch": 0.9188521589405039, "total_flos": 877032907181998080, "step": 228400 }, { "loss": 3.8675, "learning_rate": 8.227691009891676e-06, "epoch": 0.9192544584846986, "total_flos": 877422072523407360, "step": 228500 }, { "loss": 3.8975, "learning_rate": 8.226879884495744e-06, "epoch": 0.9196567580288931, "total_flos": 877813457964072960, "step": 228600 }, { "loss": 3.925, "learning_rate": 8.226068759099814e-06, "epoch": 0.9200590575730878, "total_flos": 878210648592506880, "step": 228700 }, { "loss": 3.87625, "learning_rate": 8.225257633703884e-06, "epoch": 0.9204613571172824, "total_flos": 878584528178749440, "step": 228800 }, { "loss": 3.93, "learning_rate": 8.224446508307953e-06, "epoch": 0.920863656661477, "total_flos": 878988156032778240, "step": 228900 }, { "loss": 3.92875, "learning_rate": 8.223635382912023e-06, "epoch": 0.9212659562056716, "total_flos": 879380938330152960, "step": 229000 }, { "loss": 3.93375, "learning_rate": 8.222824257516091e-06, "epoch": 0.9216682557498662, "total_flos": 879790281080832000, "step": 229100 }, { "loss": 3.92625, "learning_rate": 8.222013132120161e-06, "epoch": 0.9220705552940609, "total_flos": 880169227592171520, "step": 229200 }, { "loss": 3.96625, "learning_rate": 8.221202006724231e-06, "epoch": 0.9224728548382555, "total_flos": 880556693336064000, "step": 229300 }, { "loss": 3.88375, "learning_rate": 8.2203908813283e-06, "epoch": 0.92287515438245, "total_flos": 880957389384376320, "step": 229400 }, { "loss": 3.9225, "learning_rate": 8.21957975593237e-06, "epoch": 0.9232774539266447, "total_flos": 881343251133112320, "step": 229500 }, { "loss": 3.8925, "learning_rate": 8.21876863053644e-06, "epoch": 0.9236797534708393, "total_flos": 881720503358177280, "step": 229600 }, { "loss": 3.86875, "learning_rate": 8.217957505140508e-06, "epoch": 0.924082053015034, "total_flos": 882103465168650240, "step": 229700 }, { "loss": 3.9075, "learning_rate": 8.217146379744578e-06, "epoch": 0.9244843525592286, "total_flos": 882489549989560320, "step": 229800 }, { "loss": 3.94125, "learning_rate": 8.216335254348646e-06, "epoch": 0.9248866521034231, "total_flos": 882871035274690560, "step": 229900 }, { "loss": 3.95, "learning_rate": 8.215524128952716e-06, "epoch": 0.9252889516476178, "total_flos": 883259297704919040, "step": 230000 }, { "loss": 3.9375, "learning_rate": 8.214713003556786e-06, "epoch": 0.9256912511918124, "total_flos": 883652866066145280, "step": 230100 }, { "loss": 3.89375, "learning_rate": 8.213901878160855e-06, "epoch": 0.926093550736007, "total_flos": 884036019081338880, "step": 230200 }, { "loss": 3.8725, "learning_rate": 8.213090752764925e-06, "epoch": 0.9264958502802016, "total_flos": 884413733384478720, "step": 230300 }, { "loss": 3.9, "learning_rate": 8.212279627368995e-06, "epoch": 0.9268981498243962, "total_flos": 884803190844211200, "step": 230400 }, { "loss": 3.98375, "learning_rate": 8.211468501973063e-06, "epoch": 0.9273004493685909, "total_flos": 885164902374481920, "step": 230500 }, { "loss": 3.89125, "learning_rate": 8.210657376577133e-06, "epoch": 0.9277027489127855, "total_flos": 885543806395883520, "step": 230600 }, { "loss": 3.895, "learning_rate": 8.209846251181201e-06, "epoch": 0.92810504845698, "total_flos": 885907833627770880, "step": 230700 }, { "loss": 3.85375, "learning_rate": 8.209035125785271e-06, "epoch": 0.9285073480011747, "total_flos": 886289005549608960, "step": 230800 }, { "loss": 3.8875, "learning_rate": 8.208224000389342e-06, "epoch": 0.9289096475453693, "total_flos": 886683832675246080, "step": 230900 }, { "loss": 3.9175, "learning_rate": 8.20741287499341e-06, "epoch": 0.929311947089564, "total_flos": 887079578645790720, "step": 231000 }, { "loss": 3.95125, "learning_rate": 8.20660174959748e-06, "epoch": 0.9297142466337586, "total_flos": 887461302936821760, "step": 231100 }, { "loss": 3.93875, "learning_rate": 8.20579062420155e-06, "epoch": 0.9301165461779531, "total_flos": 887831395607347200, "step": 231200 }, { "loss": 3.88375, "learning_rate": 8.204979498805618e-06, "epoch": 0.9305188457221478, "total_flos": 888228830552924160, "step": 231300 }, { "loss": 3.93875, "learning_rate": 8.204168373409688e-06, "epoch": 0.9309211452663424, "total_flos": 888612201329049600, "step": 231400 }, { "loss": 3.90125, "learning_rate": 8.203357248013757e-06, "epoch": 0.9313234448105371, "total_flos": 888989804096102400, "step": 231500 }, { "loss": 3.9725, "learning_rate": 8.202546122617827e-06, "epoch": 0.9317257443547317, "total_flos": 889393920584417280, "step": 231600 }, { "loss": 3.90875, "learning_rate": 8.201734997221897e-06, "epoch": 0.9321280438989262, "total_flos": 889786060221480960, "step": 231700 }, { "loss": 3.8825, "learning_rate": 8.200923871825965e-06, "epoch": 0.9325303434431209, "total_flos": 890167428659281920, "step": 231800 }, { "loss": 3.9725, "learning_rate": 8.200112746430035e-06, "epoch": 0.9329326429873155, "total_flos": 890556275326156800, "step": 231900 }, { "loss": 3.88125, "learning_rate": 8.199301621034105e-06, "epoch": 0.9333349425315101, "total_flos": 890932290031779840, "step": 232000 }, { "loss": 3.96125, "learning_rate": 8.198490495638174e-06, "epoch": 0.9337372420757047, "total_flos": 891308485319639040, "step": 232100 }, { "loss": 3.8925, "learning_rate": 8.197679370242244e-06, "epoch": 0.9341395416198993, "total_flos": 891684855878492160, "step": 232200 }, { "loss": 3.92875, "learning_rate": 8.196868244846312e-06, "epoch": 0.934541841164094, "total_flos": 892068269144555520, "step": 232300 }, { "loss": 3.8725, "learning_rate": 8.196057119450382e-06, "epoch": 0.9349441407082886, "total_flos": 892452739347824640, "step": 232400 }, { "loss": 3.8925, "learning_rate": 8.195245994054452e-06, "epoch": 0.9353464402524831, "total_flos": 892833109272084480, "step": 232500 }, { "loss": 3.91, "learning_rate": 8.19443486865852e-06, "epoch": 0.9357487397966778, "total_flos": 893208895594291200, "step": 232600 }, { "loss": 3.89625, "learning_rate": 8.19362374326259e-06, "epoch": 0.9361510393408724, "total_flos": 893597890975948800, "step": 232700 }, { "loss": 3.88625, "learning_rate": 8.19281261786666e-06, "epoch": 0.9365533388850671, "total_flos": 893994338030469120, "step": 232800 }, { "loss": 3.89, "learning_rate": 8.192001492470729e-06, "epoch": 0.9369556384292617, "total_flos": 894385272015544320, "step": 232900 }, { "loss": 3.8625, "learning_rate": 8.191190367074799e-06, "epoch": 0.9373579379734562, "total_flos": 894769896244838400, "step": 233000 }, { "loss": 3.91125, "learning_rate": 8.190379241678869e-06, "epoch": 0.9377602375176509, "total_flos": 895149639442513920, "step": 233100 }, { "loss": 3.90875, "learning_rate": 8.189568116282937e-06, "epoch": 0.9381625370618455, "total_flos": 895524097954160640, "step": 233200 }, { "loss": 3.895, "learning_rate": 8.188756990887007e-06, "epoch": 0.9385648366060402, "total_flos": 895902162799288320, "step": 233300 }, { "loss": 3.86625, "learning_rate": 8.187945865491076e-06, "epoch": 0.9389671361502347, "total_flos": 896273434565591040, "step": 233400 }, { "loss": 3.94125, "learning_rate": 8.187134740095146e-06, "epoch": 0.9393694356944293, "total_flos": 896648588849971200, "step": 233500 }, { "loss": 3.95375, "learning_rate": 8.186323614699216e-06, "epoch": 0.939771735238624, "total_flos": 897028889728081920, "step": 233600 }, { "loss": 3.8675, "learning_rate": 8.185512489303284e-06, "epoch": 0.9401740347828186, "total_flos": 897397022550220800, "step": 233700 }, { "loss": 3.85875, "learning_rate": 8.184701363907354e-06, "epoch": 0.9405763343270132, "total_flos": 897770817156587520, "step": 233800 }, { "loss": 3.88375, "learning_rate": 8.183890238511424e-06, "epoch": 0.9409786338712078, "total_flos": 898158091695759360, "step": 233900 }, { "loss": 3.8875, "learning_rate": 8.183079113115493e-06, "epoch": 0.9413809334154024, "total_flos": 898543613524992000, "step": 234000 }, { "loss": 3.91125, "learning_rate": 8.182267987719563e-06, "epoch": 0.9417832329595971, "total_flos": 898944761028894720, "step": 234100 }, { "loss": 3.88875, "learning_rate": 8.181456862323631e-06, "epoch": 0.9421855325037917, "total_flos": 899336709461237760, "step": 234200 }, { "loss": 3.92625, "learning_rate": 8.180645736927701e-06, "epoch": 0.9425878320479862, "total_flos": 899724159271403520, "step": 234300 }, { "loss": 3.85, "learning_rate": 8.179834611531771e-06, "epoch": 0.9429901315921809, "total_flos": 900094756509941760, "step": 234400 }, { "loss": 3.9325, "learning_rate": 8.17902348613584e-06, "epoch": 0.9433924311363755, "total_flos": 900479232024453120, "step": 234500 }, { "loss": 3.90125, "learning_rate": 8.17821236073991e-06, "epoch": 0.9437947306805702, "total_flos": 900857280935854080, "step": 234600 }, { "loss": 3.915, "learning_rate": 8.17740123534398e-06, "epoch": 0.9441970302247648, "total_flos": 901225222553272320, "step": 234700 }, { "loss": 3.89375, "learning_rate": 8.176590109948048e-06, "epoch": 0.9445993297689593, "total_flos": 901606994645483520, "step": 234800 }, { "loss": 3.8775, "learning_rate": 8.175778984552118e-06, "epoch": 0.945001629313154, "total_flos": 901975786061660160, "step": 234900 }, { "loss": 3.87125, "learning_rate": 8.174967859156186e-06, "epoch": 0.9454039288573486, "total_flos": 902379164287303680, "step": 235000 }, { "loss": 3.8175, "learning_rate": 8.174156733760256e-06, "epoch": 0.9458062284015433, "total_flos": 902759566079016960, "step": 235100 }, { "loss": 3.8475, "learning_rate": 8.173345608364326e-06, "epoch": 0.9462085279457378, "total_flos": 903137646857871360, "step": 235200 }, { "loss": 3.9, "learning_rate": 8.172534482968395e-06, "epoch": 0.9466108274899324, "total_flos": 903538534110904320, "step": 235300 }, { "loss": 3.89625, "learning_rate": 8.171723357572465e-06, "epoch": 0.9470131270341271, "total_flos": 903907553910497280, "step": 235400 }, { "loss": 3.90625, "learning_rate": 8.170912232176535e-06, "epoch": 0.9474154265783217, "total_flos": 904277577534873600, "step": 235500 }, { "loss": 3.82625, "learning_rate": 8.170101106780603e-06, "epoch": 0.9478177261225164, "total_flos": 904662860358205440, "step": 235600 }, { "loss": 3.8575, "learning_rate": 8.169289981384673e-06, "epoch": 0.9482200256667109, "total_flos": 905054697254461440, "step": 235700 }, { "loss": 3.90375, "learning_rate": 8.168478855988741e-06, "epoch": 0.9486223252109055, "total_flos": 905444133469224960, "step": 235800 }, { "loss": 3.86, "learning_rate": 8.167667730592811e-06, "epoch": 0.9490246247551002, "total_flos": 905833649352622080, "step": 235900 }, { "loss": 3.8475, "learning_rate": 8.166856605196882e-06, "epoch": 0.9494269242992948, "total_flos": 906228986357514240, "step": 236000 }, { "loss": 3.9175, "learning_rate": 8.16604547980095e-06, "epoch": 0.9498292238434893, "total_flos": 906601872741457920, "step": 236100 }, { "loss": 3.86, "learning_rate": 8.16523435440502e-06, "epoch": 0.950231523387684, "total_flos": 906985121359011840, "step": 236200 }, { "loss": 3.8975, "learning_rate": 8.16442322900909e-06, "epoch": 0.9506338229318786, "total_flos": 907379890060984320, "step": 236300 }, { "loss": 3.93125, "learning_rate": 8.163612103613158e-06, "epoch": 0.9510361224760733, "total_flos": 907751905401200640, "step": 236400 }, { "loss": 3.83875, "learning_rate": 8.162800978217228e-06, "epoch": 0.9514384220202678, "total_flos": 908148134694789120, "step": 236500 }, { "loss": 3.87375, "learning_rate": 8.161989852821297e-06, "epoch": 0.9518407215644624, "total_flos": 908544358677135360, "step": 236600 }, { "loss": 3.89625, "learning_rate": 8.161178727425367e-06, "epoch": 0.9522430211086571, "total_flos": 908940465812152320, "step": 236700 }, { "loss": 3.865, "learning_rate": 8.160367602029437e-06, "epoch": 0.9526453206528517, "total_flos": 909309400631869440, "step": 236800 }, { "loss": 3.87625, "learning_rate": 8.159556476633505e-06, "epoch": 0.9530476201970464, "total_flos": 909686010196623360, "step": 236900 }, { "loss": 3.855, "learning_rate": 8.158745351237575e-06, "epoch": 0.9534499197412409, "total_flos": 910073842416230400, "step": 237000 }, { "loss": 3.87125, "learning_rate": 8.157934225841645e-06, "epoch": 0.9538522192854355, "total_flos": 910449915545518080, "step": 237100 }, { "loss": 3.87125, "learning_rate": 8.157123100445714e-06, "epoch": 0.9542545188296302, "total_flos": 910830221734871040, "step": 237200 }, { "loss": 3.90125, "learning_rate": 8.156311975049784e-06, "epoch": 0.9546568183738248, "total_flos": 911222839383736320, "step": 237300 }, { "loss": 3.84625, "learning_rate": 8.155500849653852e-06, "epoch": 0.9550591179180195, "total_flos": 911596426851655680, "step": 237400 }, { "loss": 3.835, "learning_rate": 8.154689724257922e-06, "epoch": 0.955461417462214, "total_flos": 911977126072934400, "step": 237500 }, { "loss": 3.8925, "learning_rate": 8.153878598861992e-06, "epoch": 0.9558637170064086, "total_flos": 912344350672650240, "step": 237600 }, { "loss": 3.85125, "learning_rate": 8.15306747346606e-06, "epoch": 0.9562660165506033, "total_flos": 912726871650017280, "step": 237700 }, { "loss": 3.88875, "learning_rate": 8.15225634807013e-06, "epoch": 0.9566683160947979, "total_flos": 913102886355640320, "step": 237800 }, { "loss": 3.86375, "learning_rate": 8.1514452226742e-06, "epoch": 0.9570706156389924, "total_flos": 913495148151275520, "step": 237900 }, { "loss": 3.88875, "learning_rate": 8.150634097278269e-06, "epoch": 0.9574729151831871, "total_flos": 913884589677281280, "step": 238000 }, { "loss": 3.90875, "learning_rate": 8.149822971882339e-06, "epoch": 0.9578752147273817, "total_flos": 914253774125383680, "step": 238100 }, { "loss": 3.8625, "learning_rate": 8.149011846486409e-06, "epoch": 0.9582775142715764, "total_flos": 914625045891686400, "step": 238200 }, { "loss": 3.8225, "learning_rate": 8.148200721090477e-06, "epoch": 0.9586798138157709, "total_flos": 915025715383787520, "step": 238300 }, { "loss": 3.8675, "learning_rate": 8.147389595694547e-06, "epoch": 0.9590821133599655, "total_flos": 915408454122086400, "step": 238400 }, { "loss": 3.855, "learning_rate": 8.146578470298616e-06, "epoch": 0.9594844129041602, "total_flos": 915797906270576640, "step": 238500 }, { "loss": 3.87, "learning_rate": 8.145767344902686e-06, "epoch": 0.9598867124483548, "total_flos": 916217781214617600, "step": 238600 }, { "loss": 3.895, "learning_rate": 8.144956219506756e-06, "epoch": 0.9602890119925495, "total_flos": 916592919565271040, "step": 238700 }, { "loss": 3.80875, "learning_rate": 8.144145094110824e-06, "epoch": 0.960691311536744, "total_flos": 916981755609661440, "step": 238800 }, { "loss": 3.8775, "learning_rate": 8.143333968714894e-06, "epoch": 0.9610936110809386, "total_flos": 917368419355975680, "step": 238900 }, { "loss": 3.80625, "learning_rate": 8.142522843318964e-06, "epoch": 0.9614959106251333, "total_flos": 917756867679682560, "step": 239000 }, { "loss": 3.885, "learning_rate": 8.141711717923033e-06, "epoch": 0.9618982101693279, "total_flos": 918145395672023040, "step": 239100 }, { "loss": 3.8475, "learning_rate": 8.140900592527103e-06, "epoch": 0.9623005097135225, "total_flos": 918520916432117760, "step": 239200 }, { "loss": 3.86625, "learning_rate": 8.140089467131171e-06, "epoch": 0.9627028092577171, "total_flos": 918900856145756160, "step": 239300 }, { "loss": 3.8175, "learning_rate": 8.139278341735241e-06, "epoch": 0.9631051088019117, "total_flos": 919279356512747520, "step": 239400 }, { "loss": 3.79, "learning_rate": 8.138467216339311e-06, "epoch": 0.9635074083461064, "total_flos": 919648833079173120, "step": 239500 }, { "loss": 3.85, "learning_rate": 8.13765609094338e-06, "epoch": 0.963909707890301, "total_flos": 920006518687825920, "step": 239600 }, { "loss": 3.8375, "learning_rate": 8.13684496554745e-06, "epoch": 0.9643120074344955, "total_flos": 920397133998366720, "step": 239700 }, { "loss": 3.86375, "learning_rate": 8.13603384015152e-06, "epoch": 0.9647143069786902, "total_flos": 920774970460078080, "step": 239800 }, { "loss": 3.875, "learning_rate": 8.135222714755588e-06, "epoch": 0.9651166065228848, "total_flos": 921165941623848960, "step": 239900 }, { "loss": 3.8025, "learning_rate": 8.134411589359658e-06, "epoch": 0.9655189060670795, "total_flos": 921546194700779520, "step": 240000 }, { "loss": 3.8275, "learning_rate": 8.133600463963726e-06, "epoch": 0.965921205611274, "total_flos": 921935131658772480, "step": 240100 }, { "loss": 3.85, "learning_rate": 8.132789338567796e-06, "epoch": 0.9663235051554686, "total_flos": 922322369019248640, "step": 240200 }, { "loss": 3.84125, "learning_rate": 8.131978213171866e-06, "epoch": 0.9667258046996633, "total_flos": 922702011303321600, "step": 240300 }, { "loss": 3.855, "learning_rate": 8.131167087775935e-06, "epoch": 0.9671281042438579, "total_flos": 923085525482987520, "step": 240400 }, { "loss": 3.87125, "learning_rate": 8.130355962380005e-06, "epoch": 0.9675304037880526, "total_flos": 923460467317678080, "step": 240500 }, { "loss": 3.8325, "learning_rate": 8.129544836984075e-06, "epoch": 0.9679327033322471, "total_flos": 923852511352381440, "step": 240600 }, { "loss": 3.86375, "learning_rate": 8.128733711588143e-06, "epoch": 0.9683350028764417, "total_flos": 924234740211425280, "step": 240700 }, { "loss": 3.81625, "learning_rate": 8.127922586192213e-06, "epoch": 0.9687373024206364, "total_flos": 924602586226483200, "step": 240800 }, { "loss": 3.9025, "learning_rate": 8.127111460796281e-06, "epoch": 0.969139601964831, "total_flos": 924981947014717440, "step": 240900 }, { "loss": 3.78375, "learning_rate": 8.126300335400351e-06, "epoch": 0.9695419015090256, "total_flos": 925367261705502720, "step": 241000 }, { "loss": 3.85625, "learning_rate": 8.125489210004422e-06, "epoch": 0.9699442010532202, "total_flos": 925750813063864320, "step": 241100 }, { "loss": 3.83125, "learning_rate": 8.12467808460849e-06, "epoch": 0.9703465005974148, "total_flos": 926136621700177920, "step": 241200 }, { "loss": 3.84375, "learning_rate": 8.12386695921256e-06, "epoch": 0.9707488001416095, "total_flos": 926517113783009280, "step": 241300 }, { "loss": 3.88125, "learning_rate": 8.12305583381663e-06, "epoch": 0.971151099685804, "total_flos": 926906454395412480, "step": 241400 }, { "loss": 3.80375, "learning_rate": 8.122244708420698e-06, "epoch": 0.9715533992299987, "total_flos": 927280132154449920, "step": 241500 }, { "loss": 3.84875, "learning_rate": 8.121433583024768e-06, "epoch": 0.9719556987741933, "total_flos": 927666864946913280, "step": 241600 }, { "loss": 3.88625, "learning_rate": 8.120622457628837e-06, "epoch": 0.9723579983183879, "total_flos": 928049667420119040, "step": 241700 }, { "loss": 3.8625, "learning_rate": 8.119811332232907e-06, "epoch": 0.9727602978625826, "total_flos": 928430356018913280, "step": 241800 }, { "loss": 3.815, "learning_rate": 8.119000206836977e-06, "epoch": 0.9731625974067771, "total_flos": 928818937123676160, "step": 241900 }, { "loss": 3.815, "learning_rate": 8.118189081441045e-06, "epoch": 0.9735648969509717, "total_flos": 929198133263400960, "step": 242000 }, { "loss": 3.87625, "learning_rate": 8.117377956045115e-06, "epoch": 0.9739671964951664, "total_flos": 929577871149834240, "step": 242100 }, { "loss": 3.835, "learning_rate": 8.116566830649185e-06, "epoch": 0.974369496039361, "total_flos": 929963924103290880, "step": 242200 }, { "loss": 3.84375, "learning_rate": 8.115755705253254e-06, "epoch": 0.9747717955835556, "total_flos": 930354773108490240, "step": 242300 }, { "loss": 3.8175, "learning_rate": 8.114944579857324e-06, "epoch": 0.9751740951277502, "total_flos": 930735541375918080, "step": 242400 }, { "loss": 3.82125, "learning_rate": 8.114133454461394e-06, "epoch": 0.9755763946719448, "total_flos": 931124642982420480, "step": 242500 }, { "loss": 3.8975, "learning_rate": 8.113322329065462e-06, "epoch": 0.9759786942161395, "total_flos": 931505124442767360, "step": 242600 }, { "loss": 3.85875, "learning_rate": 8.112511203669532e-06, "epoch": 0.976380993760334, "total_flos": 931895946891755520, "step": 242700 }, { "loss": 3.85375, "learning_rate": 8.1117000782736e-06, "epoch": 0.9767832933045287, "total_flos": 932269178506444800, "step": 242800 }, { "loss": 3.81875, "learning_rate": 8.110888952877672e-06, "epoch": 0.9771855928487233, "total_flos": 932650987777351680, "step": 242900 }, { "loss": 3.83875, "learning_rate": 8.11007782748174e-06, "epoch": 0.9775878923929179, "total_flos": 933035744787701760, "step": 243000 }, { "loss": 3.8725, "learning_rate": 8.109266702085809e-06, "epoch": 0.9779901919371126, "total_flos": 933409045448540160, "step": 243100 }, { "loss": 3.8525, "learning_rate": 8.108455576689879e-06, "epoch": 0.9783924914813071, "total_flos": 933806124540887040, "step": 243200 }, { "loss": 3.88875, "learning_rate": 8.107644451293949e-06, "epoch": 0.9787947910255018, "total_flos": 934184412458188800, "step": 243300 }, { "loss": 3.81375, "learning_rate": 8.106833325898017e-06, "epoch": 0.9791970905696964, "total_flos": 934568038173941760, "step": 243400 }, { "loss": 3.83625, "learning_rate": 8.106022200502087e-06, "epoch": 0.979599390113891, "total_flos": 934949799643668480, "step": 243500 }, { "loss": 3.83, "learning_rate": 8.105211075106156e-06, "epoch": 0.9800016896580857, "total_flos": 935335634836193280, "step": 243600 }, { "loss": 3.87125, "learning_rate": 8.104399949710227e-06, "epoch": 0.9804039892022802, "total_flos": 935725904915988480, "step": 243700 }, { "loss": 3.805, "learning_rate": 8.103588824314296e-06, "epoch": 0.9808062887464748, "total_flos": 936105934920744960, "step": 243800 }, { "loss": 3.84875, "learning_rate": 8.102777698918364e-06, "epoch": 0.9812085882906695, "total_flos": 936480759908106240, "step": 243900 }, { "loss": 3.78875, "learning_rate": 8.101966573522434e-06, "epoch": 0.981610887834864, "total_flos": 936873382868213760, "step": 244000 }, { "loss": 3.8025, "learning_rate": 8.101155448126504e-06, "epoch": 0.9820131873790587, "total_flos": 937242726653583360, "step": 244100 }, { "loss": 3.82625, "learning_rate": 8.100344322730574e-06, "epoch": 0.9824154869232533, "total_flos": 937617923427901440, "step": 244200 }, { "loss": 3.83625, "learning_rate": 8.099533197334643e-06, "epoch": 0.9828177864674479, "total_flos": 937981817878732800, "step": 244300 }, { "loss": 3.8775, "learning_rate": 8.098722071938711e-06, "epoch": 0.9832200860116426, "total_flos": 938380830263255040, "step": 244400 }, { "loss": 3.85, "learning_rate": 8.097910946542783e-06, "epoch": 0.9836223855558371, "total_flos": 938781085478461440, "step": 244500 }, { "loss": 3.83125, "learning_rate": 8.097099821146851e-06, "epoch": 0.9840246851000318, "total_flos": 939162400803840000, "step": 244600 }, { "loss": 3.80125, "learning_rate": 8.09628869575092e-06, "epoch": 0.9844269846442264, "total_flos": 939559023129354240, "step": 244700 }, { "loss": 3.78875, "learning_rate": 8.09547757035499e-06, "epoch": 0.984829284188421, "total_flos": 939949112626913280, "step": 244800 }, { "loss": 3.79125, "learning_rate": 8.09466644495906e-06, "epoch": 0.9852315837326157, "total_flos": 940354078913986560, "step": 244900 }, { "loss": 3.82, "learning_rate": 8.09385531956313e-06, "epoch": 0.9856338832768102, "total_flos": 940733540615823360, "step": 245000 }, { "loss": 3.8625, "learning_rate": 8.093044194167198e-06, "epoch": 0.9860361828210049, "total_flos": 941130417880965120, "step": 245100 }, { "loss": 3.8, "learning_rate": 8.092233068771266e-06, "epoch": 0.9864384823651995, "total_flos": 941518186365665280, "step": 245200 }, { "loss": 3.8425, "learning_rate": 8.091421943375338e-06, "epoch": 0.9868407819093941, "total_flos": 941895263319736320, "step": 245300 }, { "loss": 3.86375, "learning_rate": 8.090610817979406e-06, "epoch": 0.9872430814535887, "total_flos": 942272924510453760, "step": 245400 }, { "loss": 3.8375, "learning_rate": 8.089799692583475e-06, "epoch": 0.9876453809977833, "total_flos": 942665212862300160, "step": 245500 }, { "loss": 3.83125, "learning_rate": 8.088988567187545e-06, "epoch": 0.9880476805419779, "total_flos": 943037844306616320, "step": 245600 }, { "loss": 3.81125, "learning_rate": 8.088177441791615e-06, "epoch": 0.9884499800861726, "total_flos": 943420848607027200, "step": 245700 }, { "loss": 3.78875, "learning_rate": 8.087366316395685e-06, "epoch": 0.9888522796303671, "total_flos": 943819324556083200, "step": 245800 }, { "loss": 3.86625, "learning_rate": 8.086555190999753e-06, "epoch": 0.9892545791745618, "total_flos": 944190415740149760, "step": 245900 }, { "loss": 3.845, "learning_rate": 8.085744065603821e-06, "epoch": 0.9896568787187564, "total_flos": 944577902729011200, "step": 246000 }, { "loss": 3.7775, "learning_rate": 8.084932940207893e-06, "epoch": 0.990059178262951, "total_flos": 944968549907005440, "step": 246100 }, { "loss": 3.85375, "learning_rate": 8.084121814811962e-06, "epoch": 0.9904614778071457, "total_flos": 945359606050652160, "step": 246200 }, { "loss": 3.84625, "learning_rate": 8.08331068941603e-06, "epoch": 0.9908637773513402, "total_flos": 945763642870333440, "step": 246300 }, { "loss": 3.78125, "learning_rate": 8.0824995640201e-06, "epoch": 0.9912660768955349, "total_flos": 946149462129131520, "step": 246400 }, { "loss": 3.81375, "learning_rate": 8.08168843862417e-06, "epoch": 0.9916683764397295, "total_flos": 946525423722332160, "step": 246500 }, { "loss": 3.8, "learning_rate": 8.08087731322824e-06, "epoch": 0.9920706759839241, "total_flos": 946911800661565440, "step": 246600 }, { "loss": 3.8275, "learning_rate": 8.080066187832308e-06, "epoch": 0.9924729755281188, "total_flos": 947291411078184960, "step": 246700 }, { "loss": 3.8725, "learning_rate": 8.079255062436377e-06, "epoch": 0.9928752750723133, "total_flos": 947673427487539200, "step": 246800 }, { "loss": 3.835, "learning_rate": 8.078443937040448e-06, "epoch": 0.993277574616508, "total_flos": 948071584762060800, "step": 246900 }, { "loss": 3.84125, "learning_rate": 8.077632811644517e-06, "epoch": 0.9936798741607026, "total_flos": 948440413356933120, "step": 247000 }, { "loss": 3.83125, "learning_rate": 8.076821686248587e-06, "epoch": 0.9940821737048972, "total_flos": 948817862097960960, "step": 247100 }, { "loss": 3.86125, "learning_rate": 8.076010560852657e-06, "epoch": 0.9944844732490918, "total_flos": 949184603374632960, "step": 247200 }, { "loss": 3.83375, "learning_rate": 8.075199435456725e-06, "epoch": 0.9948867727932864, "total_flos": 949566301109452800, "step": 247300 }, { "loss": 3.81, "learning_rate": 8.074388310060795e-06, "epoch": 0.9952890723374811, "total_flos": 949941614731100160, "step": 247400 }, { "loss": 3.85625, "learning_rate": 8.073577184664864e-06, "epoch": 0.9956913718816757, "total_flos": 950334200512512000, "step": 247500 }, { "loss": 3.81625, "learning_rate": 8.072766059268934e-06, "epoch": 0.9960936714258702, "total_flos": 950723737640878080, "step": 247600 }, { "loss": 3.80625, "learning_rate": 8.071954933873004e-06, "epoch": 0.9964959709700649, "total_flos": 951114055521853440, "step": 247700 }, { "loss": 3.80875, "learning_rate": 8.071143808477072e-06, "epoch": 0.9968982705142595, "total_flos": 951497399741767680, "step": 247800 }, { "loss": 3.85, "learning_rate": 8.070332683081142e-06, "epoch": 0.9973005700584541, "total_flos": 951877164184412160, "step": 247900 }, { "loss": 3.8525, "learning_rate": 8.069521557685212e-06, "epoch": 0.9977028696026488, "total_flos": 952256105384509440, "step": 248000 }, { "loss": 3.805, "learning_rate": 8.06871043228928e-06, "epoch": 0.9981051691468433, "total_flos": 952632008554045440, "step": 248100 }, { "loss": 3.82, "learning_rate": 8.06789930689335e-06, "epoch": 0.998507468691038, "total_flos": 952999721788047360, "step": 248200 }, { "loss": 3.84375, "learning_rate": 8.067088181497419e-06, "epoch": 0.9989097682352326, "total_flos": 953383968919142400, "step": 248300 }, { "loss": 3.80125, "learning_rate": 8.066277056101489e-06, "epoch": 0.9993120677794272, "total_flos": 953753987232276480, "step": 248400 }, { "loss": 3.845, "learning_rate": 8.065465930705559e-06, "epoch": 0.9997143673236218, "total_flos": 954118667746959360, "step": 248500 }, { "loss": 3.7525, "learning_rate": 8.064654805309627e-06, "epoch": 1.0001166668678165, "total_flos": 954507907445760000, "step": 248600 }, { "loss": 3.835, "learning_rate": 8.063843679913697e-06, "epoch": 1.000518966412011, "total_flos": 954908178594693120, "step": 248700 }, { "loss": 3.8425, "learning_rate": 8.063032554517767e-06, "epoch": 1.0009212659562057, "total_flos": 955286875477647360, "step": 248800 }, { "loss": 3.82625, "learning_rate": 8.062221429121836e-06, "epoch": 1.0013235655004002, "total_flos": 955670591484518400, "step": 248900 }, { "loss": 3.775, "learning_rate": 8.061410303725906e-06, "epoch": 1.0017258650445948, "total_flos": 956046845196042240, "step": 249000 }, { "loss": 3.8175, "learning_rate": 8.060599178329974e-06, "epoch": 1.0021281645887896, "total_flos": 956446123142676480, "step": 249100 }, { "loss": 3.8475, "learning_rate": 8.059788052934044e-06, "epoch": 1.0025304641329842, "total_flos": 956821511121715200, "step": 249200 }, { "loss": 3.84625, "learning_rate": 8.058976927538114e-06, "epoch": 1.0029327636771788, "total_flos": 957220390725181440, "step": 249300 }, { "loss": 3.8325, "learning_rate": 8.058165802142183e-06, "epoch": 1.0033350632213733, "total_flos": 957616588151316480, "step": 249400 }, { "loss": 3.7925, "learning_rate": 8.057354676746253e-06, "epoch": 1.003737362765568, "total_flos": 957994732665077760, "step": 249500 }, { "loss": 3.795, "learning_rate": 8.056543551350323e-06, "epoch": 1.0041396623097627, "total_flos": 958379218802073600, "step": 249600 }, { "loss": 3.86, "learning_rate": 8.055732425954391e-06, "epoch": 1.0045419618539573, "total_flos": 958774364602245120, "step": 249700 }, { "loss": 3.785, "learning_rate": 8.054921300558461e-06, "epoch": 1.0049442613981519, "total_flos": 959155833953648640, "step": 249800 }, { "loss": 3.80375, "learning_rate": 8.05411017516253e-06, "epoch": 1.0053465609423464, "total_flos": 959535311589212160, "step": 249900 }, { "loss": 3.79125, "learning_rate": 8.0532990497666e-06, "epoch": 1.005748860486541, "total_flos": 959906588666757120, "step": 250000 }, { "loss": 3.82, "learning_rate": 8.05248792437067e-06, "epoch": 1.0061511600307358, "total_flos": 960279777791508480, "step": 250100 }, { "loss": 3.79875, "learning_rate": 8.051676798974738e-06, "epoch": 1.0065534595749304, "total_flos": 960661613618626560, "step": 250200 }, { "loss": 3.7875, "learning_rate": 8.050865673578808e-06, "epoch": 1.006955759119125, "total_flos": 961041006274314240, "step": 250300 }, { "loss": 3.77125, "learning_rate": 8.050054548182878e-06, "epoch": 1.0073580586633195, "total_flos": 961423431649320960, "step": 250400 }, { "loss": 3.81125, "learning_rate": 8.049243422786946e-06, "epoch": 1.007760358207514, "total_flos": 961804847888302080, "step": 250500 }, { "loss": 3.77875, "learning_rate": 8.048432297391016e-06, "epoch": 1.0081626577517087, "total_flos": 962190072287969280, "step": 250600 }, { "loss": 3.7725, "learning_rate": 8.047621171995085e-06, "epoch": 1.0085649572959035, "total_flos": 962576964417699840, "step": 250700 }, { "loss": 3.7675, "learning_rate": 8.046810046599155e-06, "epoch": 1.008967256840098, "total_flos": 962958439080345600, "step": 250800 }, { "loss": 3.82125, "learning_rate": 8.045998921203225e-06, "epoch": 1.0093695563842926, "total_flos": 963349617382563840, "step": 250900 }, { "loss": 3.86125, "learning_rate": 8.045187795807293e-06, "epoch": 1.0097718559284872, "total_flos": 963741863244472320, "step": 251000 }, { "loss": 3.77625, "learning_rate": 8.044376670411363e-06, "epoch": 1.0101741554726817, "total_flos": 964122047275253760, "step": 251100 }, { "loss": 3.85, "learning_rate": 8.043565545015433e-06, "epoch": 1.0105764550168765, "total_flos": 964509056252313600, "step": 251200 }, { "loss": 3.79125, "learning_rate": 8.042754419619502e-06, "epoch": 1.0109787545610711, "total_flos": 964896192699187200, "step": 251300 }, { "loss": 3.82125, "learning_rate": 8.041943294223572e-06, "epoch": 1.0113810541052657, "total_flos": 965288560719667200, "step": 251400 }, { "loss": 3.84125, "learning_rate": 8.04113216882764e-06, "epoch": 1.0117833536494603, "total_flos": 965670343434362880, "step": 251500 }, { "loss": 3.805, "learning_rate": 8.04032104343171e-06, "epoch": 1.0121856531936548, "total_flos": 966053937282662400, "step": 251600 }, { "loss": 3.7925, "learning_rate": 8.03950991803578e-06, "epoch": 1.0125879527378496, "total_flos": 966425750795673600, "step": 251700 }, { "loss": 3.79125, "learning_rate": 8.038698792639848e-06, "epoch": 1.0129902522820442, "total_flos": 966807368861859840, "step": 251800 }, { "loss": 3.78, "learning_rate": 8.037887667243918e-06, "epoch": 1.0133925518262388, "total_flos": 967201425857372160, "step": 251900 }, { "loss": 3.78375, "learning_rate": 8.037076541847988e-06, "epoch": 1.0137948513704333, "total_flos": 967587361963499520, "step": 252000 }, { "loss": 3.81625, "learning_rate": 8.036265416452057e-06, "epoch": 1.014197150914628, "total_flos": 967979947744911360, "step": 252100 }, { "loss": 3.7425, "learning_rate": 8.035454291056127e-06, "epoch": 1.0145994504588227, "total_flos": 968344766351892480, "step": 252200 }, { "loss": 3.8225, "learning_rate": 8.034643165660197e-06, "epoch": 1.0150017500030173, "total_flos": 968730622789386240, "step": 252300 }, { "loss": 3.7925, "learning_rate": 8.033832040264265e-06, "epoch": 1.0154040495472119, "total_flos": 969112676377436160, "step": 252400 }, { "loss": 3.7725, "learning_rate": 8.033020914868335e-06, "epoch": 1.0158063490914064, "total_flos": 969498012313190400, "step": 252500 }, { "loss": 3.78625, "learning_rate": 8.032209789472404e-06, "epoch": 1.016208648635601, "total_flos": 969890895524167680, "step": 252600 }, { "loss": 3.79625, "learning_rate": 8.031398664076474e-06, "epoch": 1.0166109481797958, "total_flos": 970268901945630720, "step": 252700 }, { "loss": 3.845, "learning_rate": 8.030587538680544e-06, "epoch": 1.0170132477239904, "total_flos": 970661508972011520, "step": 252800 }, { "loss": 3.8075, "learning_rate": 8.029776413284612e-06, "epoch": 1.017415547268185, "total_flos": 971049920117022720, "step": 252900 }, { "loss": 3.80875, "learning_rate": 8.028965287888682e-06, "epoch": 1.0178178468123795, "total_flos": 971437853250232320, "step": 253000 }, { "loss": 3.78625, "learning_rate": 8.028154162492752e-06, "epoch": 1.018220146356574, "total_flos": 971824060229713920, "step": 253100 }, { "loss": 3.71375, "learning_rate": 8.02734303709682e-06, "epoch": 1.018622445900769, "total_flos": 972209029689753600, "step": 253200 }, { "loss": 3.8675, "learning_rate": 8.02653191170089e-06, "epoch": 1.0190247454449635, "total_flos": 972589043760783360, "step": 253300 }, { "loss": 3.81375, "learning_rate": 8.025720786304959e-06, "epoch": 1.019427044989158, "total_flos": 972979914010951680, "step": 253400 }, { "loss": 3.78375, "learning_rate": 8.024909660909029e-06, "epoch": 1.0198293445333526, "total_flos": 973368797856522240, "step": 253500 }, { "loss": 3.77375, "learning_rate": 8.024098535513099e-06, "epoch": 1.0202316440775472, "total_flos": 973760215164641280, "step": 253600 }, { "loss": 3.7475, "learning_rate": 8.023287410117167e-06, "epoch": 1.020633943621742, "total_flos": 974163519032893440, "step": 253700 }, { "loss": 3.77625, "learning_rate": 8.022476284721237e-06, "epoch": 1.0210362431659366, "total_flos": 974548700942622720, "step": 253800 }, { "loss": 3.785, "learning_rate": 8.021665159325307e-06, "epoch": 1.0214385427101311, "total_flos": 974934355552911360, "step": 253900 }, { "loss": 3.73, "learning_rate": 8.020854033929376e-06, "epoch": 1.0218408422543257, "total_flos": 975332943038054400, "step": 254000 }, { "loss": 3.77625, "learning_rate": 8.020042908533446e-06, "epoch": 1.0222431417985203, "total_flos": 975707104120135680, "step": 254100 }, { "loss": 3.83, "learning_rate": 8.019231783137514e-06, "epoch": 1.022645441342715, "total_flos": 976090034063155200, "step": 254200 }, { "loss": 3.775, "learning_rate": 8.018420657741584e-06, "epoch": 1.0230477408869096, "total_flos": 976472156697354240, "step": 254300 }, { "loss": 3.78125, "learning_rate": 8.017609532345654e-06, "epoch": 1.0234500404311042, "total_flos": 976853089613291520, "step": 254400 }, { "loss": 3.77, "learning_rate": 8.016798406949723e-06, "epoch": 1.0238523399752988, "total_flos": 977237182718361600, "step": 254500 }, { "loss": 3.79, "learning_rate": 8.015987281553793e-06, "epoch": 1.0242546395194934, "total_flos": 977628164504616960, "step": 254600 }, { "loss": 3.8375, "learning_rate": 8.015176156157863e-06, "epoch": 1.024656939063688, "total_flos": 978007838656143360, "step": 254700 }, { "loss": 3.835, "learning_rate": 8.014365030761931e-06, "epoch": 1.0250592386078827, "total_flos": 978389908177920000, "step": 254800 }, { "loss": 3.775, "learning_rate": 8.013553905366001e-06, "epoch": 1.0254615381520773, "total_flos": 978780709381939200, "step": 254900 }, { "loss": 3.7775, "learning_rate": 8.01274277997007e-06, "epoch": 1.0258638376962719, "total_flos": 979152273266565120, "step": 255000 }, { "loss": 3.755, "learning_rate": 8.01193165457414e-06, "epoch": 1.0262661372404664, "total_flos": 979517437104291840, "step": 255100 }, { "loss": 3.77, "learning_rate": 8.01112052917821e-06, "epoch": 1.026668436784661, "total_flos": 979896341125693440, "step": 255200 }, { "loss": 3.77125, "learning_rate": 8.010309403782278e-06, "epoch": 1.0270707363288558, "total_flos": 980273566794547200, "step": 255300 }, { "loss": 3.81, "learning_rate": 8.009498278386348e-06, "epoch": 1.0274730358730504, "total_flos": 980668266450370560, "step": 255400 }, { "loss": 3.7275, "learning_rate": 8.008687152990418e-06, "epoch": 1.027875335417245, "total_flos": 981032596423065600, "step": 255500 }, { "loss": 3.76875, "learning_rate": 8.007876027594486e-06, "epoch": 1.0282776349614395, "total_flos": 981398965912780800, "step": 255600 }, { "loss": 3.76875, "learning_rate": 8.007064902198556e-06, "epoch": 1.028679934505634, "total_flos": 981790494756986880, "step": 255700 }, { "loss": 3.765, "learning_rate": 8.006253776802625e-06, "epoch": 1.029082234049829, "total_flos": 982186277906227200, "step": 255800 }, { "loss": 3.79375, "learning_rate": 8.005442651406695e-06, "epoch": 1.0294845335940235, "total_flos": 982571284544962560, "step": 255900 }, { "loss": 3.79375, "learning_rate": 8.004631526010765e-06, "epoch": 1.029886833138218, "total_flos": 982952318374502400, "step": 256000 }, { "loss": 3.795, "learning_rate": 8.003820400614833e-06, "epoch": 1.0302891326824126, "total_flos": 983329421884784640, "step": 256100 }, { "loss": 3.77125, "learning_rate": 8.003009275218903e-06, "epoch": 1.0306914322266072, "total_flos": 983720844504145920, "step": 256200 }, { "loss": 3.77125, "learning_rate": 8.002198149822973e-06, "epoch": 1.031093731770802, "total_flos": 984117753636741120, "step": 256300 }, { "loss": 3.79125, "learning_rate": 8.001387024427042e-06, "epoch": 1.0314960313149966, "total_flos": 984487469209067520, "step": 256400 }, { "loss": 3.775, "learning_rate": 8.000575899031112e-06, "epoch": 1.0318983308591911, "total_flos": 984880246195200000, "step": 256500 }, { "loss": 3.735, "learning_rate": 7.999764773635182e-06, "epoch": 1.0323006304033857, "total_flos": 985257530287718400, "step": 256600 }, { "loss": 3.79, "learning_rate": 7.99895364823925e-06, "epoch": 1.0327029299475803, "total_flos": 985637544358748160, "step": 256700 }, { "loss": 3.7625, "learning_rate": 7.99814252284332e-06, "epoch": 1.033105229491775, "total_flos": 986022917473198080, "step": 256800 }, { "loss": 3.82625, "learning_rate": 7.997331397447388e-06, "epoch": 1.0335075290359697, "total_flos": 986409007605350400, "step": 256900 }, { "loss": 3.795, "learning_rate": 7.996520272051458e-06, "epoch": 1.0339098285801642, "total_flos": 986796829202472960, "step": 257000 }, { "loss": 3.8075, "learning_rate": 7.995709146655528e-06, "epoch": 1.0343121281243588, "total_flos": 987185203168788480, "step": 257100 }, { "loss": 3.84125, "learning_rate": 7.994898021259597e-06, "epoch": 1.0347144276685534, "total_flos": 987569407809945600, "step": 257200 }, { "loss": 3.7275, "learning_rate": 7.994086895863667e-06, "epoch": 1.0351167272127482, "total_flos": 987963268289495040, "step": 257300 }, { "loss": 3.7475, "learning_rate": 7.993275770467737e-06, "epoch": 1.0355190267569427, "total_flos": 988339787563130880, "step": 257400 }, { "loss": 3.8125, "learning_rate": 7.992464645071805e-06, "epoch": 1.0359213263011373, "total_flos": 988728336800440320, "step": 257500 }, { "loss": 3.79875, "learning_rate": 7.991653519675875e-06, "epoch": 1.0363236258453319, "total_flos": 989122824006574080, "step": 257600 }, { "loss": 3.77625, "learning_rate": 7.990842394279944e-06, "epoch": 1.0367259253895265, "total_flos": 989505854863196160, "step": 257700 }, { "loss": 3.76, "learning_rate": 7.990031268884014e-06, "epoch": 1.0371282249337213, "total_flos": 989888821984911360, "step": 257800 }, { "loss": 3.76625, "learning_rate": 7.989220143488084e-06, "epoch": 1.0375305244779158, "total_flos": 990277089726382080, "step": 257900 }, { "loss": 3.78625, "learning_rate": 7.988409018092152e-06, "epoch": 1.0379328240221104, "total_flos": 990667439474810880, "step": 258000 }, { "loss": 3.7625, "learning_rate": 7.987597892696222e-06, "epoch": 1.038335123566305, "total_flos": 991063031419330560, "step": 258100 }, { "loss": 3.78, "learning_rate": 7.986786767300292e-06, "epoch": 1.0387374231104995, "total_flos": 991445132808560640, "step": 258200 }, { "loss": 3.79125, "learning_rate": 7.98597564190436e-06, "epoch": 1.0391397226546943, "total_flos": 991813000068587520, "step": 258300 }, { "loss": 3.79125, "learning_rate": 7.98516451650843e-06, "epoch": 1.039542022198889, "total_flos": 992204003099811840, "step": 258400 }, { "loss": 3.75625, "learning_rate": 7.984353391112499e-06, "epoch": 1.0399443217430835, "total_flos": 992586906486620160, "step": 258500 }, { "loss": 3.8325, "learning_rate": 7.983542265716569e-06, "epoch": 1.040346621287278, "total_flos": 992973379028213760, "step": 258600 }, { "loss": 3.8025, "learning_rate": 7.982731140320639e-06, "epoch": 1.0407489208314726, "total_flos": 993367266063974400, "step": 258700 }, { "loss": 3.7925, "learning_rate": 7.981920014924707e-06, "epoch": 1.0411512203756672, "total_flos": 993762464976568320, "step": 258800 }, { "loss": 3.79125, "learning_rate": 7.981108889528777e-06, "epoch": 1.041553519919862, "total_flos": 994144109598965760, "step": 258900 }, { "loss": 3.78625, "learning_rate": 7.980297764132847e-06, "epoch": 1.0419558194640566, "total_flos": 994537364596899840, "step": 259000 }, { "loss": 3.77125, "learning_rate": 7.979486638736916e-06, "epoch": 1.0423581190082511, "total_flos": 994936860304465920, "step": 259100 }, { "loss": 3.7275, "learning_rate": 7.978675513340986e-06, "epoch": 1.0427604185524457, "total_flos": 995330168414822400, "step": 259200 }, { "loss": 3.765, "learning_rate": 7.977864387945054e-06, "epoch": 1.0431627180966403, "total_flos": 995724698110894080, "step": 259300 }, { "loss": 3.74375, "learning_rate": 7.977053262549124e-06, "epoch": 1.043565017640835, "total_flos": 996112912739942400, "step": 259400 }, { "loss": 3.72625, "learning_rate": 7.976242137153194e-06, "epoch": 1.0439673171850297, "total_flos": 996504542497751040, "step": 259500 }, { "loss": 3.8025, "learning_rate": 7.975431011757263e-06, "epoch": 1.0443696167292242, "total_flos": 996887844227727360, "step": 259600 }, { "loss": 3.76125, "learning_rate": 7.974619886361333e-06, "epoch": 1.0447719162734188, "total_flos": 997256465684152320, "step": 259700 }, { "loss": 3.8, "learning_rate": 7.973808760965403e-06, "epoch": 1.0451742158176134, "total_flos": 997645392019660800, "step": 259800 }, { "loss": 3.7475, "learning_rate": 7.972997635569471e-06, "epoch": 1.0455765153618082, "total_flos": 998033489801379840, "step": 259900 }, { "loss": 3.7725, "learning_rate": 7.972186510173541e-06, "epoch": 1.0459788149060028, "total_flos": 998407321586442240, "step": 260000 }, { "loss": 3.7525, "learning_rate": 7.97137538477761e-06, "epoch": 1.0463811144501973, "total_flos": 998783787747655680, "step": 260100 }, { "loss": 3.7425, "learning_rate": 7.97056425938168e-06, "epoch": 1.046783413994392, "total_flos": 999156796290170880, "step": 260200 }, { "loss": 3.72375, "learning_rate": 7.96975313398575e-06, "epoch": 1.0471857135385865, "total_flos": 999547889612513280, "step": 260300 }, { "loss": 3.74375, "learning_rate": 7.968942008589818e-06, "epoch": 1.0475880130827813, "total_flos": 999926018192547840, "step": 260400 }, { "loss": 3.79, "learning_rate": 7.968130883193888e-06, "epoch": 1.0479903126269758, "total_flos": 1000321371131166720, "step": 260500 }, { "loss": 3.7375, "learning_rate": 7.967319757797958e-06, "epoch": 1.0483926121711704, "total_flos": 1000701103706357760, "step": 260600 }, { "loss": 3.69125, "learning_rate": 7.966508632402026e-06, "epoch": 1.048794911715365, "total_flos": 1001080506984529920, "step": 260700 }, { "loss": 3.79125, "learning_rate": 7.965697507006096e-06, "epoch": 1.0491972112595596, "total_flos": 1001460218314752000, "step": 260800 }, { "loss": 3.73875, "learning_rate": 7.964886381610165e-06, "epoch": 1.0495995108037544, "total_flos": 1001839478189383680, "step": 260900 }, { "loss": 3.775, "learning_rate": 7.964075256214235e-06, "epoch": 1.050001810347949, "total_flos": 1002211318258606080, "step": 261000 }, { "loss": 3.74125, "learning_rate": 7.963264130818305e-06, "epoch": 1.0504041098921435, "total_flos": 1002607112030330880, "step": 261100 }, { "loss": 3.7275, "learning_rate": 7.962453005422373e-06, "epoch": 1.050806409436338, "total_flos": 1002978718404894720, "step": 261200 }, { "loss": 3.71625, "learning_rate": 7.961641880026443e-06, "epoch": 1.0512087089805326, "total_flos": 1003370587168604160, "step": 261300 }, { "loss": 3.78375, "learning_rate": 7.960830754630513e-06, "epoch": 1.0516110085247274, "total_flos": 1003757123445104640, "step": 261400 }, { "loss": 3.78625, "learning_rate": 7.960019629234582e-06, "epoch": 1.052013308068922, "total_flos": 1004134997085511680, "step": 261500 }, { "loss": 3.7475, "learning_rate": 7.959208503838652e-06, "epoch": 1.0524156076131166, "total_flos": 1004525931070586880, "step": 261600 }, { "loss": 3.76, "learning_rate": 7.958397378442722e-06, "epoch": 1.0528179071573112, "total_flos": 1004928597589770240, "step": 261700 }, { "loss": 3.71875, "learning_rate": 7.95758625304679e-06, "epoch": 1.0532202067015057, "total_flos": 1005303385398435840, "step": 261800 }, { "loss": 3.6925, "learning_rate": 7.95677512765086e-06, "epoch": 1.0536225062457005, "total_flos": 1005681933566607360, "step": 261900 }, { "loss": 3.7575, "learning_rate": 7.955964002254928e-06, "epoch": 1.054024805789895, "total_flos": 1006062808058880000, "step": 262000 }, { "loss": 3.7525, "learning_rate": 7.955152876858998e-06, "epoch": 1.0544271053340897, "total_flos": 1006447894366248960, "step": 262100 }, { "loss": 3.70625, "learning_rate": 7.954341751463068e-06, "epoch": 1.0548294048782842, "total_flos": 1006844176772259840, "step": 262200 }, { "loss": 3.78875, "learning_rate": 7.953530626067137e-06, "epoch": 1.0552317044224788, "total_flos": 1007227728130621440, "step": 262300 }, { "loss": 3.78, "learning_rate": 7.952719500671207e-06, "epoch": 1.0556340039666736, "total_flos": 1007623941490483200, "step": 262400 }, { "loss": 3.76, "learning_rate": 7.951908375275277e-06, "epoch": 1.0560363035108682, "total_flos": 1008007917748224000, "step": 262500 }, { "loss": 3.7675, "learning_rate": 7.951097249879345e-06, "epoch": 1.0564386030550628, "total_flos": 1008403323799265280, "step": 262600 }, { "loss": 3.7075, "learning_rate": 7.950286124483415e-06, "epoch": 1.0568409025992573, "total_flos": 1008767154515189760, "step": 262700 }, { "loss": 3.78375, "learning_rate": 7.949474999087484e-06, "epoch": 1.057243202143452, "total_flos": 1009161142464552960, "step": 262800 }, { "loss": 3.72375, "learning_rate": 7.948663873691554e-06, "epoch": 1.0576455016876465, "total_flos": 1009532536389427200, "step": 262900 }, { "loss": 3.77375, "learning_rate": 7.947852748295624e-06, "epoch": 1.0580478012318413, "total_flos": 1009926524338790400, "step": 263000 }, { "loss": 3.7525, "learning_rate": 7.947041622899692e-06, "epoch": 1.0584501007760359, "total_flos": 1010295597250805760, "step": 263100 }, { "loss": 3.8, "learning_rate": 7.946230497503762e-06, "epoch": 1.0588524003202304, "total_flos": 1010670677177794560, "step": 263200 }, { "loss": 3.73375, "learning_rate": 7.945419372107832e-06, "epoch": 1.059254699864425, "total_flos": 1011063926864486400, "step": 263300 }, { "loss": 3.72375, "learning_rate": 7.9446082467119e-06, "epoch": 1.0596569994086196, "total_flos": 1011457261531054080, "step": 263400 }, { "loss": 3.75625, "learning_rate": 7.94379712131597e-06, "epoch": 1.0600592989528144, "total_flos": 1011830790575308800, "step": 263500 }, { "loss": 3.73, "learning_rate": 7.942985995920039e-06, "epoch": 1.060461598497009, "total_flos": 1012200214029312000, "step": 263600 }, { "loss": 3.7475, "learning_rate": 7.942174870524109e-06, "epoch": 1.0608638980412035, "total_flos": 1012580897316864000, "step": 263700 }, { "loss": 3.75125, "learning_rate": 7.941363745128179e-06, "epoch": 1.061266197585398, "total_flos": 1012977726780825600, "step": 263800 }, { "loss": 3.7075, "learning_rate": 7.940552619732247e-06, "epoch": 1.0616684971295927, "total_flos": 1013358426002104320, "step": 263900 }, { "loss": 3.71375, "learning_rate": 7.939741494336317e-06, "epoch": 1.0620707966737875, "total_flos": 1013742412882329600, "step": 264000 }, { "loss": 3.78375, "learning_rate": 7.938930368940387e-06, "epoch": 1.062473096217982, "total_flos": 1014117529988014080, "step": 264100 }, { "loss": 3.77125, "learning_rate": 7.938119243544456e-06, "epoch": 1.0628753957621766, "total_flos": 1014495435495874560, "step": 264200 }, { "loss": 3.71625, "learning_rate": 7.937308118148526e-06, "epoch": 1.0632776953063712, "total_flos": 1014897565579591680, "step": 264300 }, { "loss": 3.6875, "learning_rate": 7.936496992752594e-06, "epoch": 1.0636799948505657, "total_flos": 1015286098883174400, "step": 264400 }, { "loss": 3.79875, "learning_rate": 7.935685867356666e-06, "epoch": 1.0640822943947605, "total_flos": 1015660026270597120, "step": 264500 }, { "loss": 3.72625, "learning_rate": 7.934874741960734e-06, "epoch": 1.0644845939389551, "total_flos": 1016040837027962880, "step": 264600 }, { "loss": 3.72625, "learning_rate": 7.934063616564803e-06, "epoch": 1.0648868934831497, "total_flos": 1016439366089441280, "step": 264700 }, { "loss": 3.68125, "learning_rate": 7.933252491168873e-06, "epoch": 1.0652891930273443, "total_flos": 1016808338087854080, "step": 264800 }, { "loss": 3.69125, "learning_rate": 7.932441365772943e-06, "epoch": 1.0656914925715388, "total_flos": 1017187284599193600, "step": 264900 }, { "loss": 3.76875, "learning_rate": 7.931630240377011e-06, "epoch": 1.0660937921157336, "total_flos": 1017566640076185600, "step": 265000 }, { "loss": 3.71125, "learning_rate": 7.930819114981081e-06, "epoch": 1.0664960916599282, "total_flos": 1017944534961561600, "step": 265100 }, { "loss": 3.69125, "learning_rate": 7.93000798958515e-06, "epoch": 1.0668983912041228, "total_flos": 1018334496989306880, "step": 265200 }, { "loss": 3.75125, "learning_rate": 7.929196864189221e-06, "epoch": 1.0673006907483173, "total_flos": 1018703686748651520, "step": 265300 }, { "loss": 3.79375, "learning_rate": 7.92838573879329e-06, "epoch": 1.067702990292512, "total_flos": 1019090031820431360, "step": 265400 }, { "loss": 3.74, "learning_rate": 7.927574613397358e-06, "epoch": 1.0681052898367067, "total_flos": 1019492629293465600, "step": 265500 }, { "loss": 3.7275, "learning_rate": 7.926763488001428e-06, "epoch": 1.0685075893809013, "total_flos": 1019870024922071040, "step": 265600 }, { "loss": 3.735, "learning_rate": 7.925952362605498e-06, "epoch": 1.0689098889250959, "total_flos": 1020243113133219840, "step": 265700 }, { "loss": 3.7525, "learning_rate": 7.925141237209566e-06, "epoch": 1.0693121884692904, "total_flos": 1020608191991070720, "step": 265800 }, { "loss": 3.7575, "learning_rate": 7.924330111813636e-06, "epoch": 1.069714488013485, "total_flos": 1020985343302533120, "step": 265900 }, { "loss": 3.72, "learning_rate": 7.923518986417705e-06, "epoch": 1.0701167875576796, "total_flos": 1021384504401838080, "step": 266000 }, { "loss": 3.72875, "learning_rate": 7.922707861021776e-06, "epoch": 1.0705190871018744, "total_flos": 1021765447940259840, "step": 266100 }, { "loss": 3.71375, "learning_rate": 7.921896735625845e-06, "epoch": 1.070921386646069, "total_flos": 1022152653433282560, "step": 266200 }, { "loss": 3.74875, "learning_rate": 7.921085610229913e-06, "epoch": 1.0713236861902635, "total_flos": 1022548479072460800, "step": 266300 }, { "loss": 3.7, "learning_rate": 7.920274484833985e-06, "epoch": 1.071725985734458, "total_flos": 1022933257327779840, "step": 266400 }, { "loss": 3.685, "learning_rate": 7.919463359438053e-06, "epoch": 1.072128285278653, "total_flos": 1023313621940797440, "step": 266500 }, { "loss": 3.71875, "learning_rate": 7.918652234042122e-06, "epoch": 1.0725305848228475, "total_flos": 1023699361530961920, "step": 266600 }, { "loss": 3.71625, "learning_rate": 7.917841108646192e-06, "epoch": 1.072932884367042, "total_flos": 1024066023139000320, "step": 266700 }, { "loss": 3.71875, "learning_rate": 7.917029983250262e-06, "epoch": 1.0733351839112366, "total_flos": 1024465885322280960, "step": 266800 }, { "loss": 3.725, "learning_rate": 7.916218857854332e-06, "epoch": 1.0737374834554312, "total_flos": 1024855135643566080, "step": 266900 }, { "loss": 3.68375, "learning_rate": 7.9154077324584e-06, "epoch": 1.0741397829996258, "total_flos": 1025242978485657600, "step": 267000 }, { "loss": 3.7325, "learning_rate": 7.914596607062468e-06, "epoch": 1.0745420825438206, "total_flos": 1025638257066885120, "step": 267100 }, { "loss": 3.71125, "learning_rate": 7.91378548166654e-06, "epoch": 1.0749443820880151, "total_flos": 1026018807573381120, "step": 267200 }, { "loss": 3.7475, "learning_rate": 7.912974356270608e-06, "epoch": 1.0753466816322097, "total_flos": 1026407117804789760, "step": 267300 }, { "loss": 3.7525, "learning_rate": 7.912163230874678e-06, "epoch": 1.0757489811764043, "total_flos": 1026813109161615360, "step": 267400 }, { "loss": 3.76375, "learning_rate": 7.911352105478747e-06, "epoch": 1.0761512807205988, "total_flos": 1027194907810037760, "step": 267500 }, { "loss": 3.69125, "learning_rate": 7.910540980082817e-06, "epoch": 1.0765535802647936, "total_flos": 1027577731528212480, "step": 267600 }, { "loss": 3.73125, "learning_rate": 7.909729854686887e-06, "epoch": 1.0769558798089882, "total_flos": 1027955992889303040, "step": 267700 }, { "loss": 3.745, "learning_rate": 7.908918729290955e-06, "epoch": 1.0773581793531828, "total_flos": 1028323913261752320, "step": 267800 }, { "loss": 3.71125, "learning_rate": 7.908107603895024e-06, "epoch": 1.0777604788973774, "total_flos": 1028709567872040960, "step": 267900 }, { "loss": 3.73875, "learning_rate": 7.907296478499095e-06, "epoch": 1.078162778441572, "total_flos": 1029099184669040640, "step": 268000 }, { "loss": 3.6925, "learning_rate": 7.906485353103164e-06, "epoch": 1.0785650779857667, "total_flos": 1029477589433671680, "step": 268100 }, { "loss": 3.7725, "learning_rate": 7.905674227707234e-06, "epoch": 1.0789673775299613, "total_flos": 1029849185185751040, "step": 268200 }, { "loss": 3.73875, "learning_rate": 7.904863102311302e-06, "epoch": 1.0793696770741559, "total_flos": 1030239630536540160, "step": 268300 }, { "loss": 3.70875, "learning_rate": 7.904051976915372e-06, "epoch": 1.0797719766183504, "total_flos": 1030622847286640640, "step": 268400 }, { "loss": 3.7, "learning_rate": 7.903240851519442e-06, "epoch": 1.080174276162545, "total_flos": 1031016335979233280, "step": 268500 }, { "loss": 3.74875, "learning_rate": 7.90242972612351e-06, "epoch": 1.0805765757067398, "total_flos": 1031404210688778240, "step": 268600 }, { "loss": 3.6675, "learning_rate": 7.901618600727579e-06, "epoch": 1.0809788752509344, "total_flos": 1031779380906885120, "step": 268700 }, { "loss": 3.73375, "learning_rate": 7.90080747533165e-06, "epoch": 1.081381174795129, "total_flos": 1032169751900282880, "step": 268800 }, { "loss": 3.73375, "learning_rate": 7.899996349935719e-06, "epoch": 1.0817834743393235, "total_flos": 1032566305179648000, "step": 268900 }, { "loss": 3.7225, "learning_rate": 7.899185224539789e-06, "epoch": 1.082185773883518, "total_flos": 1032945533186826240, "step": 269000 }, { "loss": 3.7, "learning_rate": 7.898374099143857e-06, "epoch": 1.082588073427713, "total_flos": 1033324878041333760, "step": 269100 }, { "loss": 3.725, "learning_rate": 7.897562973747927e-06, "epoch": 1.0829903729719075, "total_flos": 1033703442143232000, "step": 269200 }, { "loss": 3.7625, "learning_rate": 7.896751848351997e-06, "epoch": 1.083392672516102, "total_flos": 1034090127134515200, "step": 269300 }, { "loss": 3.7375, "learning_rate": 7.895940722956066e-06, "epoch": 1.0837949720602966, "total_flos": 1034479658951639040, "step": 269400 }, { "loss": 3.71625, "learning_rate": 7.895129597560136e-06, "epoch": 1.0841972716044912, "total_flos": 1034845651343155200, "step": 269500 }, { "loss": 3.67125, "learning_rate": 7.894318472164206e-06, "epoch": 1.084599571148686, "total_flos": 1035230769517977600, "step": 269600 }, { "loss": 3.64125, "learning_rate": 7.893507346768274e-06, "epoch": 1.0850018706928806, "total_flos": 1035598429639557120, "step": 269700 }, { "loss": 3.67375, "learning_rate": 7.892696221372344e-06, "epoch": 1.0854041702370751, "total_flos": 1035973350229278720, "step": 269800 }, { "loss": 3.7875, "learning_rate": 7.891885095976413e-06, "epoch": 1.0858064697812697, "total_flos": 1036349067505336320, "step": 269900 }, { "loss": 3.76625, "learning_rate": 7.891073970580483e-06, "epoch": 1.0862087693254643, "total_flos": 1036730876776243200, "step": 270000 }, { "loss": 3.67125, "learning_rate": 7.890262845184553e-06, "epoch": 1.0866110688696589, "total_flos": 1037098048263536640, "step": 270100 }, { "loss": 3.6975, "learning_rate": 7.889451719788621e-06, "epoch": 1.0870133684138537, "total_flos": 1037492466423521280, "step": 270200 }, { "loss": 3.69125, "learning_rate": 7.888640594392691e-06, "epoch": 1.0874156679580482, "total_flos": 1037877876716666880, "step": 270300 }, { "loss": 3.71875, "learning_rate": 7.887829468996761e-06, "epoch": 1.0878179675022428, "total_flos": 1038259085817200640, "step": 270400 }, { "loss": 3.76875, "learning_rate": 7.88701834360083e-06, "epoch": 1.0882202670464374, "total_flos": 1038648893818920960, "step": 270500 }, { "loss": 3.70375, "learning_rate": 7.8862072182049e-06, "epoch": 1.0886225665906322, "total_flos": 1039046344698224640, "step": 270600 }, { "loss": 3.7075, "learning_rate": 7.885396092808968e-06, "epoch": 1.0890248661348267, "total_flos": 1039432036487208960, "step": 270700 }, { "loss": 3.66375, "learning_rate": 7.884584967413038e-06, "epoch": 1.0894271656790213, "total_flos": 1039817717653708800, "step": 270800 }, { "loss": 3.65875, "learning_rate": 7.883773842017108e-06, "epoch": 1.0898294652232159, "total_flos": 1040190614660136960, "step": 270900 }, { "loss": 3.7425, "learning_rate": 7.882962716621176e-06, "epoch": 1.0902317647674105, "total_flos": 1040590025387827200, "step": 271000 }, { "loss": 3.71375, "learning_rate": 7.882151591225246e-06, "epoch": 1.090634064311605, "total_flos": 1040987146970112000, "step": 271100 }, { "loss": 3.675, "learning_rate": 7.881340465829316e-06, "epoch": 1.0910363638557998, "total_flos": 1041380768443760640, "step": 271200 }, { "loss": 3.72625, "learning_rate": 7.880529340433385e-06, "epoch": 1.0914386633999944, "total_flos": 1041764144531128320, "step": 271300 }, { "loss": 3.6975, "learning_rate": 7.879718215037455e-06, "epoch": 1.091840962944189, "total_flos": 1042158992901734400, "step": 271400 }, { "loss": 3.735, "learning_rate": 7.878907089641525e-06, "epoch": 1.0922432624883835, "total_flos": 1042564724007690240, "step": 271500 }, { "loss": 3.7075, "learning_rate": 7.878095964245593e-06, "epoch": 1.0926455620325781, "total_flos": 1042969063568179200, "step": 271600 }, { "loss": 3.6825, "learning_rate": 7.877284838849663e-06, "epoch": 1.093047861576773, "total_flos": 1043364469619220480, "step": 271700 }, { "loss": 3.73375, "learning_rate": 7.876473713453732e-06, "epoch": 1.0934501611209675, "total_flos": 1043752105322864640, "step": 271800 }, { "loss": 3.6975, "learning_rate": 7.875662588057802e-06, "epoch": 1.093852460665162, "total_flos": 1044126181425070080, "step": 271900 }, { "loss": 3.67125, "learning_rate": 7.874851462661872e-06, "epoch": 1.0942547602093566, "total_flos": 1044513875552378880, "step": 272000 }, { "loss": 3.73875, "learning_rate": 7.87404033726594e-06, "epoch": 1.0946570597535512, "total_flos": 1044893969292042240, "step": 272100 }, { "loss": 3.69625, "learning_rate": 7.87322921187001e-06, "epoch": 1.095059359297746, "total_flos": 1045290511948922880, "step": 272200 }, { "loss": 3.7075, "learning_rate": 7.87241808647408e-06, "epoch": 1.0954616588419406, "total_flos": 1045670887184424960, "step": 272300 }, { "loss": 3.7625, "learning_rate": 7.871606961078148e-06, "epoch": 1.0958639583861352, "total_flos": 1046057901472727040, "step": 272400 }, { "loss": 3.67, "learning_rate": 7.870795835682218e-06, "epoch": 1.0962662579303297, "total_flos": 1046425726242816000, "step": 272500 }, { "loss": 3.745, "learning_rate": 7.869984710286287e-06, "epoch": 1.0966685574745243, "total_flos": 1046823575465287680, "step": 272600 }, { "loss": 3.69125, "learning_rate": 7.869173584890357e-06, "epoch": 1.097070857018719, "total_flos": 1047200620551905280, "step": 272700 }, { "loss": 3.7025, "learning_rate": 7.868362459494427e-06, "epoch": 1.0974731565629137, "total_flos": 1047591666073067520, "step": 272800 }, { "loss": 3.65375, "learning_rate": 7.867551334098495e-06, "epoch": 1.0978754561071082, "total_flos": 1047978414799257600, "step": 272900 }, { "loss": 3.6825, "learning_rate": 7.866740208702565e-06, "epoch": 1.0982777556513028, "total_flos": 1048372535529676800, "step": 273000 }, { "loss": 3.7175, "learning_rate": 7.865929083306635e-06, "epoch": 1.0986800551954974, "total_flos": 1048757005732945920, "step": 273100 }, { "loss": 3.68375, "learning_rate": 7.865117957910704e-06, "epoch": 1.0990823547396922, "total_flos": 1049143701346713600, "step": 273200 }, { "loss": 3.69125, "learning_rate": 7.864306832514774e-06, "epoch": 1.0994846542838868, "total_flos": 1049536010943528960, "step": 273300 }, { "loss": 3.655, "learning_rate": 7.863495707118842e-06, "epoch": 1.0998869538280813, "total_flos": 1049918404451082240, "step": 273400 }, { "loss": 3.745, "learning_rate": 7.862684581722912e-06, "epoch": 1.100289253372276, "total_flos": 1050297951132794880, "step": 273500 }, { "loss": 3.70125, "learning_rate": 7.861873456326982e-06, "epoch": 1.1006915529164705, "total_flos": 1050685002599792640, "step": 273600 }, { "loss": 3.65625, "learning_rate": 7.86106233093105e-06, "epoch": 1.1010938524606653, "total_flos": 1051057952718643200, "step": 273700 }, { "loss": 3.65625, "learning_rate": 7.86025120553512e-06, "epoch": 1.1014961520048598, "total_flos": 1051439183064145920, "step": 273800 }, { "loss": 3.6975, "learning_rate": 7.85944008013919e-06, "epoch": 1.1018984515490544, "total_flos": 1051830403856302080, "step": 273900 }, { "loss": 3.70875, "learning_rate": 7.858628954743259e-06, "epoch": 1.102300751093249, "total_flos": 1052213833056092160, "step": 274000 }, { "loss": 3.70875, "learning_rate": 7.857817829347329e-06, "epoch": 1.1027030506374436, "total_flos": 1052591552670474240, "step": 274100 }, { "loss": 3.69125, "learning_rate": 7.857006703951397e-06, "epoch": 1.1031053501816381, "total_flos": 1052972687413616640, "step": 274200 }, { "loss": 3.755, "learning_rate": 7.856195578555467e-06, "epoch": 1.103507649725833, "total_flos": 1053357141683159040, "step": 274300 }, { "loss": 3.72, "learning_rate": 7.855384453159537e-06, "epoch": 1.1039099492700275, "total_flos": 1053727292777349120, "step": 274400 }, { "loss": 3.73875, "learning_rate": 7.854573327763606e-06, "epoch": 1.104312248814222, "total_flos": 1054111545219686400, "step": 274500 }, { "loss": 3.6175, "learning_rate": 7.853762202367676e-06, "epoch": 1.1047145483584166, "total_flos": 1054498543574261760, "step": 274600 }, { "loss": 3.68375, "learning_rate": 7.852951076971746e-06, "epoch": 1.1051168479026114, "total_flos": 1054890454827909120, "step": 274700 }, { "loss": 3.72875, "learning_rate": 7.852139951575814e-06, "epoch": 1.105519147446806, "total_flos": 1055272333144965120, "step": 274800 }, { "loss": 3.69375, "learning_rate": 7.851328826179884e-06, "epoch": 1.1059214469910006, "total_flos": 1055647891083755520, "step": 274900 }, { "loss": 3.65875, "learning_rate": 7.850517700783953e-06, "epoch": 1.1063237465351952, "total_flos": 1056039589887713280, "step": 275000 }, { "loss": 3.6725, "learning_rate": 7.849706575388023e-06, "epoch": 1.1067260460793897, "total_flos": 1056420342221414400, "step": 275100 }, { "loss": 3.735, "learning_rate": 7.848895449992093e-06, "epoch": 1.1071283456235843, "total_flos": 1056802496723066880, "step": 275200 }, { "loss": 3.695, "learning_rate": 7.848084324596161e-06, "epoch": 1.107530645167779, "total_flos": 1057187322779566080, "step": 275300 }, { "loss": 3.70125, "learning_rate": 7.847273199200231e-06, "epoch": 1.1079329447119737, "total_flos": 1057576360651161600, "step": 275400 }, { "loss": 3.68125, "learning_rate": 7.846462073804301e-06, "epoch": 1.1083352442561683, "total_flos": 1057967480529715200, "step": 275500 }, { "loss": 3.69375, "learning_rate": 7.84565094840837e-06, "epoch": 1.1087375438003628, "total_flos": 1058349093284659200, "step": 275600 }, { "loss": 3.71125, "learning_rate": 7.84483982301244e-06, "epoch": 1.1091398433445574, "total_flos": 1058733520997990400, "step": 275700 }, { "loss": 3.6775, "learning_rate": 7.84402869761651e-06, "epoch": 1.1095421428887522, "total_flos": 1059117178581196800, "step": 275800 }, { "loss": 3.6975, "learning_rate": 7.843217572220578e-06, "epoch": 1.1099444424329468, "total_flos": 1059511952594411520, "step": 275900 }, { "loss": 3.69375, "learning_rate": 7.842406446824648e-06, "epoch": 1.1103467419771413, "total_flos": 1059895817316065280, "step": 276000 }, { "loss": 3.675, "learning_rate": 7.841595321428716e-06, "epoch": 1.110749041521336, "total_flos": 1060276325332623360, "step": 276100 }, { "loss": 3.69875, "learning_rate": 7.840784196032786e-06, "epoch": 1.1111513410655305, "total_flos": 1060650130561474560, "step": 276200 }, { "loss": 3.69875, "learning_rate": 7.839973070636856e-06, "epoch": 1.1115536406097253, "total_flos": 1061029794090516480, "step": 276300 }, { "loss": 3.70875, "learning_rate": 7.839161945240925e-06, "epoch": 1.1119559401539199, "total_flos": 1061429178261995520, "step": 276400 }, { "loss": 3.6675, "learning_rate": 7.838350819844995e-06, "epoch": 1.1123582396981144, "total_flos": 1061829799952916480, "step": 276500 }, { "loss": 3.68375, "learning_rate": 7.837539694449065e-06, "epoch": 1.112760539242309, "total_flos": 1062223729478615040, "step": 276600 }, { "loss": 3.66875, "learning_rate": 7.836728569053133e-06, "epoch": 1.1131628387865036, "total_flos": 1062609819610767360, "step": 276700 }, { "loss": 3.70875, "learning_rate": 7.835917443657203e-06, "epoch": 1.1135651383306984, "total_flos": 1063003961586155520, "step": 276800 }, { "loss": 3.695, "learning_rate": 7.835106318261272e-06, "epoch": 1.113967437874893, "total_flos": 1063372306857984000, "step": 276900 }, { "loss": 3.71875, "learning_rate": 7.834295192865342e-06, "epoch": 1.1143697374190875, "total_flos": 1063750318590689280, "step": 277000 }, { "loss": 3.73625, "learning_rate": 7.833484067469412e-06, "epoch": 1.114772036963282, "total_flos": 1064116279114752000, "step": 277100 }, { "loss": 3.72375, "learning_rate": 7.83267294207348e-06, "epoch": 1.1151743365074767, "total_flos": 1064516497151262720, "step": 277200 }, { "loss": 3.7125, "learning_rate": 7.83186181667755e-06, "epoch": 1.1155766360516715, "total_flos": 1064906299841740800, "step": 277300 }, { "loss": 3.69625, "learning_rate": 7.83105069128162e-06, "epoch": 1.115978935595866, "total_flos": 1065290716932587520, "step": 277400 }, { "loss": 3.70125, "learning_rate": 7.830239565885688e-06, "epoch": 1.1163812351400606, "total_flos": 1065681672162631680, "step": 277500 }, { "loss": 3.64375, "learning_rate": 7.829428440489758e-06, "epoch": 1.1167835346842552, "total_flos": 1066066232657018880, "step": 277600 }, { "loss": 3.69, "learning_rate": 7.828617315093827e-06, "epoch": 1.1171858342284497, "total_flos": 1066449773392896000, "step": 277700 }, { "loss": 3.7125, "learning_rate": 7.827806189697897e-06, "epoch": 1.1175881337726445, "total_flos": 1066844032215613440, "step": 277800 }, { "loss": 3.73, "learning_rate": 7.826995064301967e-06, "epoch": 1.1179904333168391, "total_flos": 1067223355825152000, "step": 277900 }, { "loss": 3.715, "learning_rate": 7.826183938906035e-06, "epoch": 1.1183927328610337, "total_flos": 1067617588091658240, "step": 278000 }, { "loss": 3.70875, "learning_rate": 7.825372813510105e-06, "epoch": 1.1187950324052283, "total_flos": 1067992200629329920, "step": 278100 }, { "loss": 3.61625, "learning_rate": 7.824561688114175e-06, "epoch": 1.1191973319494228, "total_flos": 1068375661696573440, "step": 278200 }, { "loss": 3.7175, "learning_rate": 7.823750562718244e-06, "epoch": 1.1195996314936174, "total_flos": 1068766032689971200, "step": 278300 }, { "loss": 3.6675, "learning_rate": 7.822939437322314e-06, "epoch": 1.1200019310378122, "total_flos": 1069139970699878400, "step": 278400 }, { "loss": 3.68375, "learning_rate": 7.822128311926382e-06, "epoch": 1.1204042305820068, "total_flos": 1069539694790860800, "step": 278500 }, { "loss": 3.65, "learning_rate": 7.821317186530452e-06, "epoch": 1.1208065301262014, "total_flos": 1069923246149222400, "step": 278600 }, { "loss": 3.66625, "learning_rate": 7.820506061134522e-06, "epoch": 1.121208829670396, "total_flos": 1070289668751360000, "step": 278700 }, { "loss": 3.6525, "learning_rate": 7.81969493573859e-06, "epoch": 1.1216111292145905, "total_flos": 1070664897393131520, "step": 278800 }, { "loss": 3.6725, "learning_rate": 7.81888381034266e-06, "epoch": 1.1220134287587853, "total_flos": 1071037210162913280, "step": 278900 }, { "loss": 3.72125, "learning_rate": 7.81807268494673e-06, "epoch": 1.1224157283029799, "total_flos": 1071416167296737280, "step": 279000 }, { "loss": 3.6175, "learning_rate": 7.817261559550799e-06, "epoch": 1.1228180278471744, "total_flos": 1071797302039879680, "step": 279100 }, { "loss": 3.62875, "learning_rate": 7.816450434154869e-06, "epoch": 1.123220327391369, "total_flos": 1072179860195942400, "step": 279200 }, { "loss": 3.6425, "learning_rate": 7.815639308758937e-06, "epoch": 1.1236226269355636, "total_flos": 1072550213117337600, "step": 279300 }, { "loss": 3.705, "learning_rate": 7.814828183363007e-06, "epoch": 1.1240249264797584, "total_flos": 1072929802288988160, "step": 279400 }, { "loss": 3.6975, "learning_rate": 7.814017057967077e-06, "epoch": 1.124427226023953, "total_flos": 1073329818498293760, "step": 279500 }, { "loss": 3.665, "learning_rate": 7.813205932571146e-06, "epoch": 1.1248295255681475, "total_flos": 1073719095375790080, "step": 279600 }, { "loss": 3.7075, "learning_rate": 7.812394807175216e-06, "epoch": 1.125231825112342, "total_flos": 1074089315516129280, "step": 279700 }, { "loss": 3.70875, "learning_rate": 7.811583681779286e-06, "epoch": 1.1256341246565367, "total_flos": 1074471246945607680, "step": 279800 }, { "loss": 3.67875, "learning_rate": 7.810772556383354e-06, "epoch": 1.1260364242007315, "total_flos": 1074854139709931520, "step": 279900 }, { "loss": 3.66875, "learning_rate": 7.809961430987424e-06, "epoch": 1.126438723744926, "total_flos": 1075238137212641280, "step": 280000 }, { "loss": 3.64125, "learning_rate": 7.809150305591493e-06, "epoch": 1.1268410232891206, "total_flos": 1075624232656035840, "step": 280100 }, { "loss": 3.65375, "learning_rate": 7.808339180195563e-06, "epoch": 1.1272433228333152, "total_flos": 1076008516965826560, "step": 280200 }, { "loss": 3.63375, "learning_rate": 7.807528054799633e-06, "epoch": 1.1276456223775098, "total_flos": 1076376304557219840, "step": 280300 }, { "loss": 3.66625, "learning_rate": 7.806716929403701e-06, "epoch": 1.1280479219217046, "total_flos": 1076758496237568000, "step": 280400 }, { "loss": 3.6475, "learning_rate": 7.805905804007771e-06, "epoch": 1.1284502214658991, "total_flos": 1077144862554316800, "step": 280500 }, { "loss": 3.7075, "learning_rate": 7.805094678611841e-06, "epoch": 1.1288525210100937, "total_flos": 1077517669269626880, "step": 280600 }, { "loss": 3.64375, "learning_rate": 7.80428355321591e-06, "epoch": 1.1292548205542883, "total_flos": 1077892382720901120, "step": 280700 }, { "loss": 3.71625, "learning_rate": 7.80347242781998e-06, "epoch": 1.1296571200984828, "total_flos": 1078270298851246080, "step": 280800 }, { "loss": 3.66875, "learning_rate": 7.80266130242405e-06, "epoch": 1.1300594196426776, "total_flos": 1078665104731914240, "step": 280900 }, { "loss": 3.67, "learning_rate": 7.801850177028118e-06, "epoch": 1.1304617191868722, "total_flos": 1079050525647544320, "step": 281000 }, { "loss": 3.67, "learning_rate": 7.801039051632188e-06, "epoch": 1.1308640187310668, "total_flos": 1079448024328028160, "step": 281100 }, { "loss": 3.7175, "learning_rate": 7.800227926236256e-06, "epoch": 1.1312663182752614, "total_flos": 1079856464167526400, "step": 281200 }, { "loss": 3.64125, "learning_rate": 7.799416800840326e-06, "epoch": 1.131668617819456, "total_flos": 1080251997688381440, "step": 281300 }, { "loss": 3.6975, "learning_rate": 7.798605675444396e-06, "epoch": 1.1320709173636505, "total_flos": 1080621654837043200, "step": 281400 }, { "loss": 3.73625, "learning_rate": 7.797794550048465e-06, "epoch": 1.1324732169078453, "total_flos": 1080999390385152000, "step": 281500 }, { "loss": 3.65, "learning_rate": 7.796983424652535e-06, "epoch": 1.1328755164520399, "total_flos": 1081386325004820480, "step": 281600 }, { "loss": 3.70625, "learning_rate": 7.796172299256605e-06, "epoch": 1.1332778159962344, "total_flos": 1081762982370754560, "step": 281700 }, { "loss": 3.6725, "learning_rate": 7.795361173860673e-06, "epoch": 1.133680115540429, "total_flos": 1082149083125391360, "step": 281800 }, { "loss": 3.69, "learning_rate": 7.794550048464743e-06, "epoch": 1.1340824150846238, "total_flos": 1082538126308229120, "step": 281900 }, { "loss": 3.65125, "learning_rate": 7.793738923068812e-06, "epoch": 1.1344847146288184, "total_flos": 1082911920914595840, "step": 282000 }, { "loss": 3.69, "learning_rate": 7.792927797672882e-06, "epoch": 1.134887014173013, "total_flos": 1083299158275072000, "step": 282100 }, { "loss": 3.68875, "learning_rate": 7.792116672276952e-06, "epoch": 1.1352893137172075, "total_flos": 1083683479763558400, "step": 282200 }, { "loss": 3.6975, "learning_rate": 7.79130554688102e-06, "epoch": 1.135691613261402, "total_flos": 1084066808049745920, "step": 282300 }, { "loss": 3.705, "learning_rate": 7.79049442148509e-06, "epoch": 1.1360939128055967, "total_flos": 1084452526394941440, "step": 282400 }, { "loss": 3.64375, "learning_rate": 7.78968329608916e-06, "epoch": 1.1364962123497915, "total_flos": 1084839163585044480, "step": 282500 }, { "loss": 3.66625, "learning_rate": 7.788872170693228e-06, "epoch": 1.136898511893986, "total_flos": 1085224637613096960, "step": 282600 }, { "loss": 3.655, "learning_rate": 7.788061045297298e-06, "epoch": 1.1373008114381806, "total_flos": 1085599414799278080, "step": 282700 }, { "loss": 3.61625, "learning_rate": 7.787249919901367e-06, "epoch": 1.1377031109823752, "total_flos": 1085982493457080320, "step": 282800 }, { "loss": 3.69625, "learning_rate": 7.786438794505437e-06, "epoch": 1.13810541052657, "total_flos": 1086356314619658240, "step": 282900 }, { "loss": 3.6525, "learning_rate": 7.785627669109507e-06, "epoch": 1.1385077100707646, "total_flos": 1086757509924741120, "step": 283000 }, { "loss": 3.67625, "learning_rate": 7.784816543713575e-06, "epoch": 1.1389100096149591, "total_flos": 1087136084649123840, "step": 283100 }, { "loss": 3.6675, "learning_rate": 7.784005418317645e-06, "epoch": 1.1393123091591537, "total_flos": 1087529881393766400, "step": 283200 }, { "loss": 3.63875, "learning_rate": 7.783194292921715e-06, "epoch": 1.1397146087033483, "total_flos": 1087914585291694080, "step": 283300 }, { "loss": 3.6175, "learning_rate": 7.782383167525784e-06, "epoch": 1.1401169082475429, "total_flos": 1088294514382848000, "step": 283400 }, { "loss": 3.64375, "learning_rate": 7.781572042129854e-06, "epoch": 1.1405192077917377, "total_flos": 1088679489154129920, "step": 283500 }, { "loss": 3.69625, "learning_rate": 7.780760916733922e-06, "epoch": 1.1409215073359322, "total_flos": 1089060257421557760, "step": 283600 }, { "loss": 3.7525, "learning_rate": 7.779949791337992e-06, "epoch": 1.1413238068801268, "total_flos": 1089430551919288320, "step": 283700 }, { "loss": 3.6275, "learning_rate": 7.779138665942062e-06, "epoch": 1.1417261064243214, "total_flos": 1089803193986088960, "step": 283800 }, { "loss": 3.6125, "learning_rate": 7.77832754054613e-06, "epoch": 1.142128405968516, "total_flos": 1090194818432655360, "step": 283900 }, { "loss": 3.63125, "learning_rate": 7.7775164151502e-06, "epoch": 1.1425307055127107, "total_flos": 1090580998855925760, "step": 284000 }, { "loss": 3.66625, "learning_rate": 7.77670528975427e-06, "epoch": 1.1429330050569053, "total_flos": 1090971306114416640, "step": 284100 }, { "loss": 3.65875, "learning_rate": 7.775894164358339e-06, "epoch": 1.1433353046010999, "total_flos": 1091337983656181760, "step": 284200 }, { "loss": 3.66375, "learning_rate": 7.775083038962409e-06, "epoch": 1.1437376041452945, "total_flos": 1091718332335472640, "step": 284300 }, { "loss": 3.6375, "learning_rate": 7.774271913566477e-06, "epoch": 1.144139903689489, "total_flos": 1092114827191173120, "step": 284400 }, { "loss": 3.6275, "learning_rate": 7.773460788170547e-06, "epoch": 1.1445422032336838, "total_flos": 1092496965759098880, "step": 284500 }, { "loss": 3.6825, "learning_rate": 7.772649662774617e-06, "epoch": 1.1449445027778784, "total_flos": 1092877213524787200, "step": 284600 }, { "loss": 3.635, "learning_rate": 7.771838537378686e-06, "epoch": 1.145346802322073, "total_flos": 1093259989441781760, "step": 284700 }, { "loss": 3.70375, "learning_rate": 7.771027411982756e-06, "epoch": 1.1457491018662675, "total_flos": 1093653749007728640, "step": 284800 }, { "loss": 3.69625, "learning_rate": 7.770216286586826e-06, "epoch": 1.1461514014104621, "total_flos": 1094041145705472000, "step": 284900 }, { "loss": 3.715, "learning_rate": 7.769405161190894e-06, "epoch": 1.146553700954657, "total_flos": 1094420272799047680, "step": 285000 }, { "loss": 3.69125, "learning_rate": 7.768594035794964e-06, "epoch": 1.1469560004988515, "total_flos": 1094798682874920960, "step": 285100 }, { "loss": 3.65, "learning_rate": 7.767782910399034e-06, "epoch": 1.147358300043046, "total_flos": 1095172355322716160, "step": 285200 }, { "loss": 3.675, "learning_rate": 7.766971785003103e-06, "epoch": 1.1477605995872406, "total_flos": 1095554870988840960, "step": 285300 }, { "loss": 3.62625, "learning_rate": 7.766160659607173e-06, "epoch": 1.1481628991314352, "total_flos": 1095948333125222400, "step": 285400 }, { "loss": 3.6325, "learning_rate": 7.765349534211241e-06, "epoch": 1.1485651986756298, "total_flos": 1096321431958855680, "step": 285500 }, { "loss": 3.64375, "learning_rate": 7.764538408815313e-06, "epoch": 1.1489674982198246, "total_flos": 1096701111421624320, "step": 285600 }, { "loss": 3.59, "learning_rate": 7.763727283419381e-06, "epoch": 1.1493697977640192, "total_flos": 1097088375338311680, "step": 285700 }, { "loss": 3.655, "learning_rate": 7.76291615802345e-06, "epoch": 1.1497720973082137, "total_flos": 1097470205854187520, "step": 285800 }, { "loss": 3.6275, "learning_rate": 7.76210503262752e-06, "epoch": 1.1501743968524083, "total_flos": 1097850442997391360, "step": 285900 }, { "loss": 3.59875, "learning_rate": 7.76129390723159e-06, "epoch": 1.150576696396603, "total_flos": 1098235800178114560, "step": 286000 }, { "loss": 3.6725, "learning_rate": 7.760482781835658e-06, "epoch": 1.1509789959407977, "total_flos": 1098632364079964160, "step": 286100 }, { "loss": 3.69625, "learning_rate": 7.759671656439728e-06, "epoch": 1.1513812954849922, "total_flos": 1099036098158837760, "step": 286200 }, { "loss": 3.6075, "learning_rate": 7.758860531043796e-06, "epoch": 1.1517835950291868, "total_flos": 1099417275391918080, "step": 286300 }, { "loss": 3.63625, "learning_rate": 7.758049405647868e-06, "epoch": 1.1521858945733814, "total_flos": 1099806339819724800, "step": 286400 }, { "loss": 3.5675, "learning_rate": 7.757238280251936e-06, "epoch": 1.152588194117576, "total_flos": 1100182354525347840, "step": 286500 }, { "loss": 3.6625, "learning_rate": 7.756427154856005e-06, "epoch": 1.1529904936617708, "total_flos": 1100560408747991040, "step": 286600 }, { "loss": 3.60375, "learning_rate": 7.755616029460075e-06, "epoch": 1.1533927932059653, "total_flos": 1100944671812812800, "step": 286700 }, { "loss": 3.67, "learning_rate": 7.754804904064145e-06, "epoch": 1.15379509275016, "total_flos": 1101337061078261760, "step": 286800 }, { "loss": 3.6725, "learning_rate": 7.753993778668215e-06, "epoch": 1.1541973922943545, "total_flos": 1101724962344017920, "step": 286900 }, { "loss": 3.63625, "learning_rate": 7.753182653272283e-06, "epoch": 1.1545996918385493, "total_flos": 1102114828769402880, "step": 287000 }, { "loss": 3.65125, "learning_rate": 7.752371527876352e-06, "epoch": 1.1550019913827438, "total_flos": 1102491443645399040, "step": 287100 }, { "loss": 3.62125, "learning_rate": 7.751560402480423e-06, "epoch": 1.1554042909269384, "total_flos": 1102867543330897920, "step": 287200 }, { "loss": 3.665, "learning_rate": 7.750749277084492e-06, "epoch": 1.155806590471133, "total_flos": 1103266927502376960, "step": 287300 }, { "loss": 3.64125, "learning_rate": 7.74993815168856e-06, "epoch": 1.1562088900153276, "total_flos": 1103662354798387200, "step": 287400 }, { "loss": 3.65375, "learning_rate": 7.74912702629263e-06, "epoch": 1.1566111895595221, "total_flos": 1104034083331522560, "step": 287500 }, { "loss": 3.6275, "learning_rate": 7.7483159008967e-06, "epoch": 1.157013489103717, "total_flos": 1104410708830003200, "step": 287600 }, { "loss": 3.6475, "learning_rate": 7.74750477550077e-06, "epoch": 1.1574157886479115, "total_flos": 1104790165220597760, "step": 287700 }, { "loss": 3.66625, "learning_rate": 7.746693650104838e-06, "epoch": 1.157818088192106, "total_flos": 1105179362429460480, "step": 287800 }, { "loss": 3.6625, "learning_rate": 7.745882524708907e-06, "epoch": 1.1582203877363006, "total_flos": 1105575251803545600, "step": 287900 }, { "loss": 3.64625, "learning_rate": 7.745071399312979e-06, "epoch": 1.1586226872804952, "total_flos": 1105963206181724160, "step": 288000 }, { "loss": 3.69375, "learning_rate": 7.744260273917047e-06, "epoch": 1.15902498682469, "total_flos": 1106343459258654720, "step": 288100 }, { "loss": 3.635, "learning_rate": 7.743449148521115e-06, "epoch": 1.1594272863688846, "total_flos": 1106726776922357760, "step": 288200 }, { "loss": 3.68125, "learning_rate": 7.742638023125185e-06, "epoch": 1.1598295859130792, "total_flos": 1107109329767178240, "step": 288300 }, { "loss": 3.6625, "learning_rate": 7.741826897729255e-06, "epoch": 1.1602318854572737, "total_flos": 1107489088898580480, "step": 288400 }, { "loss": 3.66875, "learning_rate": 7.741015772333325e-06, "epoch": 1.1606341850014683, "total_flos": 1107860243817553920, "step": 288500 }, { "loss": 3.62125, "learning_rate": 7.740204646937394e-06, "epoch": 1.161036484545663, "total_flos": 1108256032278036480, "step": 288600 }, { "loss": 3.65125, "learning_rate": 7.739393521541462e-06, "epoch": 1.1614387840898577, "total_flos": 1108645888080936960, "step": 288700 }, { "loss": 3.6375, "learning_rate": 7.738582396145534e-06, "epoch": 1.1618410836340523, "total_flos": 1109021982455193600, "step": 288800 }, { "loss": 3.65375, "learning_rate": 7.737771270749602e-06, "epoch": 1.1622433831782468, "total_flos": 1109399670202122240, "step": 288900 }, { "loss": 3.6375, "learning_rate": 7.73696014535367e-06, "epoch": 1.1626456827224414, "total_flos": 1109782031842222080, "step": 289000 }, { "loss": 3.68, "learning_rate": 7.73614901995774e-06, "epoch": 1.1630479822666362, "total_flos": 1110150961350696960, "step": 289100 }, { "loss": 3.61375, "learning_rate": 7.73533789456181e-06, "epoch": 1.1634502818108308, "total_flos": 1110545427311861760, "step": 289200 }, { "loss": 3.6875, "learning_rate": 7.73452676916588e-06, "epoch": 1.1638525813550253, "total_flos": 1110910904512880640, "step": 289300 }, { "loss": 3.65, "learning_rate": 7.733715643769949e-06, "epoch": 1.16425488089922, "total_flos": 1111285623275397120, "step": 289400 }, { "loss": 3.6525, "learning_rate": 7.732904518374017e-06, "epoch": 1.1646571804434145, "total_flos": 1111666242828042240, "step": 289500 }, { "loss": 3.63125, "learning_rate": 7.732093392978089e-06, "epoch": 1.165059479987609, "total_flos": 1112040685405962240, "step": 289600 }, { "loss": 3.64375, "learning_rate": 7.731282267582157e-06, "epoch": 1.1654617795318039, "total_flos": 1112437344910172160, "step": 289700 }, { "loss": 3.6025, "learning_rate": 7.730471142186227e-06, "epoch": 1.1658640790759984, "total_flos": 1112802742442557440, "step": 289800 }, { "loss": 3.6025, "learning_rate": 7.729660016790298e-06, "epoch": 1.166266378620193, "total_flos": 1113183813450792960, "step": 289900 }, { "loss": 3.6625, "learning_rate": 7.728848891394366e-06, "epoch": 1.1666686781643876, "total_flos": 1113560943517286400, "step": 290000 }, { "loss": 3.61625, "learning_rate": 7.728037765998436e-06, "epoch": 1.1670709777085824, "total_flos": 1113946422856581120, "step": 290100 }, { "loss": 3.60625, "learning_rate": 7.727226640602504e-06, "epoch": 1.167473277252777, "total_flos": 1114323807862702080, "step": 290200 }, { "loss": 3.66125, "learning_rate": 7.726415515206574e-06, "epoch": 1.1678755767969715, "total_flos": 1114711693194731520, "step": 290300 }, { "loss": 3.63375, "learning_rate": 7.725604389810644e-06, "epoch": 1.168277876341166, "total_flos": 1115095765054832640, "step": 290400 }, { "loss": 3.65125, "learning_rate": 7.724793264414713e-06, "epoch": 1.1686801758853607, "total_flos": 1115489816739102720, "step": 290500 }, { "loss": 3.64625, "learning_rate": 7.723982139018783e-06, "epoch": 1.1690824754295552, "total_flos": 1115866490038763520, "step": 290600 }, { "loss": 3.6175, "learning_rate": 7.723171013622853e-06, "epoch": 1.16948477497375, "total_flos": 1116242180758609920, "step": 290700 }, { "loss": 3.57375, "learning_rate": 7.722359888226921e-06, "epoch": 1.1698870745179446, "total_flos": 1116625971122872320, "step": 290800 }, { "loss": 3.67875, "learning_rate": 7.721548762830991e-06, "epoch": 1.1702893740621392, "total_flos": 1117004035968000000, "step": 290900 }, { "loss": 3.6075, "learning_rate": 7.72073763743506e-06, "epoch": 1.1706916736063337, "total_flos": 1117391491089408000, "step": 291000 }, { "loss": 3.68, "learning_rate": 7.71992651203913e-06, "epoch": 1.1710939731505285, "total_flos": 1117762741610741760, "step": 291100 }, { "loss": 3.62, "learning_rate": 7.7191153866432e-06, "epoch": 1.1714962726947231, "total_flos": 1118138480131768320, "step": 291200 }, { "loss": 3.6225, "learning_rate": 7.718304261247268e-06, "epoch": 1.1718985722389177, "total_flos": 1118524283456839680, "step": 291300 }, { "loss": 3.57625, "learning_rate": 7.717493135851338e-06, "epoch": 1.1723008717831123, "total_flos": 1118914744741355520, "step": 291400 }, { "loss": 3.65375, "learning_rate": 7.716682010455408e-06, "epoch": 1.1727031713273068, "total_flos": 1119303421448478720, "step": 291500 }, { "loss": 3.6575, "learning_rate": 7.715870885059476e-06, "epoch": 1.1731054708715014, "total_flos": 1119685039514664960, "step": 291600 }, { "loss": 3.60625, "learning_rate": 7.715059759663546e-06, "epoch": 1.1735077704156962, "total_flos": 1120067464889671680, "step": 291700 }, { "loss": 3.645, "learning_rate": 7.714248634267615e-06, "epoch": 1.1739100699598908, "total_flos": 1120462461975060480, "step": 291800 }, { "loss": 3.63, "learning_rate": 7.713437508871685e-06, "epoch": 1.1743123695040854, "total_flos": 1120844196888576000, "step": 291900 }, { "loss": 3.635, "learning_rate": 7.712626383475755e-06, "epoch": 1.17471466904828, "total_flos": 1121213083907112960, "step": 292000 }, { "loss": 3.58375, "learning_rate": 7.711815258079823e-06, "epoch": 1.1751169685924745, "total_flos": 1121587908894474240, "step": 292100 }, { "loss": 3.585, "learning_rate": 7.711004132683893e-06, "epoch": 1.1755192681366693, "total_flos": 1121968836499169280, "step": 292200 }, { "loss": 3.605, "learning_rate": 7.710193007287963e-06, "epoch": 1.1759215676808639, "total_flos": 1122347767076782080, "step": 292300 }, { "loss": 3.62125, "learning_rate": 7.709381881892032e-06, "epoch": 1.1763238672250584, "total_flos": 1122736263201669120, "step": 292400 }, { "loss": 3.57, "learning_rate": 7.708570756496102e-06, "epoch": 1.176726166769253, "total_flos": 1123121726607237120, "step": 292500 }, { "loss": 3.6175, "learning_rate": 7.70775963110017e-06, "epoch": 1.1771284663134476, "total_flos": 1123496434747269120, "step": 292600 }, { "loss": 3.60125, "learning_rate": 7.70694850570424e-06, "epoch": 1.1775307658576422, "total_flos": 1123882296496005120, "step": 292700 }, { "loss": 3.6125, "learning_rate": 7.70613738030831e-06, "epoch": 1.177933065401837, "total_flos": 1124272051385303040, "step": 292800 }, { "loss": 3.62375, "learning_rate": 7.705326254912378e-06, "epoch": 1.1783353649460315, "total_flos": 1124647843018752000, "step": 292900 }, { "loss": 3.61125, "learning_rate": 7.704515129516449e-06, "epoch": 1.178737664490226, "total_flos": 1125040641249853440, "step": 293000 }, { "loss": 3.595, "learning_rate": 7.703704004120519e-06, "epoch": 1.1791399640344207, "total_flos": 1125435957009776640, "step": 293100 }, { "loss": 3.5725, "learning_rate": 7.702892878724587e-06, "epoch": 1.1795422635786155, "total_flos": 1125826290824478720, "step": 293200 }, { "loss": 3.63625, "learning_rate": 7.702081753328657e-06, "epoch": 1.17994456312281, "total_flos": 1126199628664012800, "step": 293300 }, { "loss": 3.58875, "learning_rate": 7.701270627932725e-06, "epoch": 1.1803468626670046, "total_flos": 1126574671412305920, "step": 293400 }, { "loss": 3.64875, "learning_rate": 7.700459502536795e-06, "epoch": 1.1807491622111992, "total_flos": 1126961276734955520, "step": 293500 }, { "loss": 3.62, "learning_rate": 7.699648377140865e-06, "epoch": 1.1811514617553938, "total_flos": 1127342395544371200, "step": 293600 }, { "loss": 3.5775, "learning_rate": 7.698837251744934e-06, "epoch": 1.1815537612995883, "total_flos": 1127736654367088640, "step": 293700 }, { "loss": 3.605, "learning_rate": 7.698026126349004e-06, "epoch": 1.1819560608437831, "total_flos": 1128106067198607360, "step": 293800 }, { "loss": 3.64, "learning_rate": 7.697215000953074e-06, "epoch": 1.1823583603879777, "total_flos": 1128485698860195840, "step": 293900 }, { "loss": 3.6, "learning_rate": 7.696403875557142e-06, "epoch": 1.1827606599321723, "total_flos": 1128877344551731200, "step": 294000 }, { "loss": 3.6075, "learning_rate": 7.695592750161212e-06, "epoch": 1.1831629594763668, "total_flos": 1129266403668295680, "step": 294100 }, { "loss": 3.5925, "learning_rate": 7.69478162476528e-06, "epoch": 1.1835652590205616, "total_flos": 1129641361436712960, "step": 294200 }, { "loss": 3.645, "learning_rate": 7.69397049936935e-06, "epoch": 1.1839675585647562, "total_flos": 1130028396969984000, "step": 294300 }, { "loss": 3.61875, "learning_rate": 7.69315937397342e-06, "epoch": 1.1843698581089508, "total_flos": 1130408416352256000, "step": 294400 }, { "loss": 3.6475, "learning_rate": 7.692348248577489e-06, "epoch": 1.1847721576531454, "total_flos": 1130790284046827520, "step": 294500 }, { "loss": 3.6625, "learning_rate": 7.691537123181559e-06, "epoch": 1.18517445719734, "total_flos": 1131172735978045440, "step": 294600 }, { "loss": 3.67125, "learning_rate": 7.690725997785629e-06, "epoch": 1.1855767567415345, "total_flos": 1131561922564423680, "step": 294700 }, { "loss": 3.62375, "learning_rate": 7.689914872389697e-06, "epoch": 1.1859790562857293, "total_flos": 1131948522575831040, "step": 294800 }, { "loss": 3.6425, "learning_rate": 7.689103746993767e-06, "epoch": 1.1863813558299239, "total_flos": 1132327777139220480, "step": 294900 }, { "loss": 3.625, "learning_rate": 7.688292621597838e-06, "epoch": 1.1867836553741185, "total_flos": 1132701603613040640, "step": 295000 }, { "loss": 3.65125, "learning_rate": 7.687481496201906e-06, "epoch": 1.187185954918313, "total_flos": 1133083540353761280, "step": 295100 }, { "loss": 3.655, "learning_rate": 7.686670370805976e-06, "epoch": 1.1875882544625076, "total_flos": 1133467452876595200, "step": 295200 }, { "loss": 3.585, "learning_rate": 7.685859245410044e-06, "epoch": 1.1879905540067024, "total_flos": 1133848704467066880, "step": 295300 }, { "loss": 3.625, "learning_rate": 7.685048120014114e-06, "epoch": 1.188392853550897, "total_flos": 1134247775275253760, "step": 295400 }, { "loss": 3.62, "learning_rate": 7.684236994618184e-06, "epoch": 1.1887951530950915, "total_flos": 1134624172390318080, "step": 295500 }, { "loss": 3.65875, "learning_rate": 7.683425869222253e-06, "epoch": 1.1891974526392861, "total_flos": 1135008812553338880, "step": 295600 }, { "loss": 3.59125, "learning_rate": 7.682614743826323e-06, "epoch": 1.1895997521834807, "total_flos": 1135410666452459520, "step": 295700 }, { "loss": 3.58125, "learning_rate": 7.681803618430393e-06, "epoch": 1.1900020517276755, "total_flos": 1135792831576596480, "step": 295800 }, { "loss": 3.63625, "learning_rate": 7.680992493034461e-06, "epoch": 1.19040435127187, "total_flos": 1136167109506007040, "step": 295900 }, { "loss": 3.60625, "learning_rate": 7.680181367638531e-06, "epoch": 1.1908066508160646, "total_flos": 1136547479430266880, "step": 296000 }, { "loss": 3.5675, "learning_rate": 7.6793702422426e-06, "epoch": 1.1912089503602592, "total_flos": 1136913965767311360, "step": 296100 }, { "loss": 3.58, "learning_rate": 7.67855911684667e-06, "epoch": 1.1916112499044538, "total_flos": 1137300220547973120, "step": 296200 }, { "loss": 3.56875, "learning_rate": 7.67774799145074e-06, "epoch": 1.1920135494486486, "total_flos": 1137693071891496960, "step": 296300 }, { "loss": 3.6375, "learning_rate": 7.676936866054808e-06, "epoch": 1.1924158489928431, "total_flos": 1138065421839974400, "step": 296400 }, { "loss": 3.66125, "learning_rate": 7.676125740658878e-06, "epoch": 1.1928181485370377, "total_flos": 1138444044365537280, "step": 296500 }, { "loss": 3.5825, "learning_rate": 7.675314615262948e-06, "epoch": 1.1932204480812323, "total_flos": 1138845494610247680, "step": 296600 }, { "loss": 3.66375, "learning_rate": 7.674503489867016e-06, "epoch": 1.1936227476254269, "total_flos": 1139236954408304640, "step": 296700 }, { "loss": 3.6225, "learning_rate": 7.673692364471086e-06, "epoch": 1.1940250471696214, "total_flos": 1139617595205918720, "step": 296800 }, { "loss": 3.58625, "learning_rate": 7.672881239075155e-06, "epoch": 1.1944273467138162, "total_flos": 1140001815780802560, "step": 296900 }, { "loss": 3.5825, "learning_rate": 7.672070113679225e-06, "epoch": 1.1948296462580108, "total_flos": 1140384172109660160, "step": 297000 }, { "loss": 3.60625, "learning_rate": 7.671258988283295e-06, "epoch": 1.1952319458022054, "total_flos": 1140774792731443200, "step": 297100 }, { "loss": 3.58375, "learning_rate": 7.670447862887363e-06, "epoch": 1.1956342453464, "total_flos": 1141174277816524800, "step": 297200 }, { "loss": 3.6775, "learning_rate": 7.669636737491433e-06, "epoch": 1.1960365448905947, "total_flos": 1141562593359175680, "step": 297300 }, { "loss": 3.5675, "learning_rate": 7.668825612095503e-06, "epoch": 1.1964388444347893, "total_flos": 1141936435766722560, "step": 297400 }, { "loss": 3.6275, "learning_rate": 7.668014486699572e-06, "epoch": 1.196841143978984, "total_flos": 1142300388641218560, "step": 297500 }, { "loss": 3.5975, "learning_rate": 7.667203361303642e-06, "epoch": 1.1972434435231785, "total_flos": 1142711234473451520, "step": 297600 }, { "loss": 3.60375, "learning_rate": 7.66639223590771e-06, "epoch": 1.197645743067373, "total_flos": 1143095216042434560, "step": 297700 }, { "loss": 3.58875, "learning_rate": 7.66558111051178e-06, "epoch": 1.1980480426115676, "total_flos": 1143471984944455680, "step": 297800 }, { "loss": 3.59, "learning_rate": 7.66476998511585e-06, "epoch": 1.1984503421557624, "total_flos": 1143838216341872640, "step": 297900 }, { "loss": 3.57875, "learning_rate": 7.663958859719918e-06, "epoch": 1.198852641699957, "total_flos": 1144220073413959680, "step": 298000 }, { "loss": 3.59875, "learning_rate": 7.663147734323989e-06, "epoch": 1.1992549412441516, "total_flos": 1144596714846167040, "step": 298100 }, { "loss": 3.615, "learning_rate": 7.662336608928059e-06, "epoch": 1.1996572407883461, "total_flos": 1144982061404405760, "step": 298200 }, { "loss": 3.58375, "learning_rate": 7.661525483532127e-06, "epoch": 1.200059540332541, "total_flos": 1145355824143319040, "step": 298300 }, { "loss": 3.6325, "learning_rate": 7.660714358136197e-06, "epoch": 1.2004618398767355, "total_flos": 1145742599425720320, "step": 298400 }, { "loss": 3.61375, "learning_rate": 7.659903232740265e-06, "epoch": 1.20086413942093, "total_flos": 1146130394466631680, "step": 298500 }, { "loss": 3.63375, "learning_rate": 7.659092107344335e-06, "epoch": 1.2012664389651246, "total_flos": 1146497454417838080, "step": 298600 }, { "loss": 3.57875, "learning_rate": 7.658280981948405e-06, "epoch": 1.2016687385093192, "total_flos": 1146889519697510400, "step": 298700 }, { "loss": 3.57125, "learning_rate": 7.657469856552474e-06, "epoch": 1.2020710380535138, "total_flos": 1147276942951464960, "step": 298800 }, { "loss": 3.59375, "learning_rate": 7.656658731156544e-06, "epoch": 1.2024733375977086, "total_flos": 1147671244264120320, "step": 298900 }, { "loss": 3.6075, "learning_rate": 7.655847605760614e-06, "epoch": 1.2028756371419032, "total_flos": 1148039111524147200, "step": 299000 }, { "loss": 3.60375, "learning_rate": 7.655036480364682e-06, "epoch": 1.2032779366860977, "total_flos": 1148416724913684480, "step": 299100 }, { "loss": 3.5725, "learning_rate": 7.654225354968752e-06, "epoch": 1.2036802362302923, "total_flos": 1148806809100001280, "step": 299200 }, { "loss": 3.5525, "learning_rate": 7.653414229572822e-06, "epoch": 1.2040825357744869, "total_flos": 1149188687417057280, "step": 299300 }, { "loss": 3.55, "learning_rate": 7.65260310417689e-06, "epoch": 1.2044848353186817, "total_flos": 1149568643064422400, "step": 299400 }, { "loss": 3.585, "learning_rate": 7.65179197878096e-06, "epoch": 1.2048871348628762, "total_flos": 1149959964770181120, "step": 299500 }, { "loss": 3.57, "learning_rate": 7.650980853385029e-06, "epoch": 1.2052894344070708, "total_flos": 1150329563495178240, "step": 299600 }, { "loss": 3.6325, "learning_rate": 7.650169727989099e-06, "epoch": 1.2056917339512654, "total_flos": 1150711861400371200, "step": 299700 }, { "loss": 3.63, "learning_rate": 7.649358602593169e-06, "epoch": 1.20609403349546, "total_flos": 1151095258732707840, "step": 299800 }, { "loss": 3.59375, "learning_rate": 7.648547477197237e-06, "epoch": 1.2064963330396548, "total_flos": 1151478815402311680, "step": 299900 }, { "loss": 3.645, "learning_rate": 7.647736351801307e-06, "epoch": 1.2068986325838493, "total_flos": 1151855244384829440, "step": 300000 }, { "loss": 3.58875, "learning_rate": 7.646925226405378e-06, "epoch": 1.207300932128044, "total_flos": 1152233346408652800, "step": 300100 }, { "loss": 3.595, "learning_rate": 7.646114101009446e-06, "epoch": 1.2077032316722385, "total_flos": 1152607799609057280, "step": 300200 }, { "loss": 3.60625, "learning_rate": 7.645302975613516e-06, "epoch": 1.208105531216433, "total_flos": 1152985875076669440, "step": 300300 }, { "loss": 3.58125, "learning_rate": 7.644491850217584e-06, "epoch": 1.2085078307606278, "total_flos": 1153373542647767040, "step": 300400 }, { "loss": 3.595, "learning_rate": 7.643680724821654e-06, "epoch": 1.2089101303048224, "total_flos": 1153753551407554560, "step": 300500 }, { "loss": 3.5675, "learning_rate": 7.642869599425724e-06, "epoch": 1.209312429849017, "total_flos": 1154130867367526400, "step": 300600 }, { "loss": 3.595, "learning_rate": 7.642058474029793e-06, "epoch": 1.2097147293932116, "total_flos": 1154503700639047680, "step": 300700 }, { "loss": 3.61, "learning_rate": 7.641247348633863e-06, "epoch": 1.2101170289374061, "total_flos": 1154863170825093120, "step": 300800 }, { "loss": 3.595, "learning_rate": 7.640436223237933e-06, "epoch": 1.2105193284816007, "total_flos": 1155245049142149120, "step": 300900 }, { "loss": 3.61, "learning_rate": 7.639625097842001e-06, "epoch": 1.2109216280257955, "total_flos": 1155620219360256000, "step": 301000 }, { "loss": 3.5425, "learning_rate": 7.638813972446071e-06, "epoch": 1.21132392756999, "total_flos": 1155997699968737280, "step": 301100 }, { "loss": 3.61125, "learning_rate": 7.63800284705014e-06, "epoch": 1.2117262271141847, "total_flos": 1156366337358888960, "step": 301200 }, { "loss": 3.57375, "learning_rate": 7.63719172165421e-06, "epoch": 1.2121285266583792, "total_flos": 1156745857484390400, "step": 301300 }, { "loss": 3.57125, "learning_rate": 7.63638059625828e-06, "epoch": 1.212530826202574, "total_flos": 1157103277530931200, "step": 301400 }, { "loss": 3.6175, "learning_rate": 7.635569470862348e-06, "epoch": 1.2129331257467686, "total_flos": 1157481979725127680, "step": 301500 }, { "loss": 3.55875, "learning_rate": 7.634758345466418e-06, "epoch": 1.2133354252909632, "total_flos": 1157879223465984000, "step": 301600 }, { "loss": 3.64, "learning_rate": 7.633947220070488e-06, "epoch": 1.2137377248351577, "total_flos": 1158257638853099520, "step": 301700 }, { "loss": 3.57375, "learning_rate": 7.633136094674556e-06, "epoch": 1.2141400243793523, "total_flos": 1158661229528432640, "step": 301800 }, { "loss": 3.58875, "learning_rate": 7.632324969278626e-06, "epoch": 1.2145423239235469, "total_flos": 1159040399111946240, "step": 301900 }, { "loss": 3.5725, "learning_rate": 7.631513843882695e-06, "epoch": 1.2149446234677417, "total_flos": 1159421422319001600, "step": 302000 }, { "loss": 3.66125, "learning_rate": 7.630702718486765e-06, "epoch": 1.2153469230119363, "total_flos": 1159810991314821120, "step": 302100 }, { "loss": 3.63875, "learning_rate": 7.629891593090835e-06, "epoch": 1.2157492225561308, "total_flos": 1160208065095925760, "step": 302200 }, { "loss": 3.54875, "learning_rate": 7.629080467694904e-06, "epoch": 1.2161515221003254, "total_flos": 1160593942778388480, "step": 302300 }, { "loss": 3.61, "learning_rate": 7.628269342298973e-06, "epoch": 1.2165538216445202, "total_flos": 1160966499865313280, "step": 302400 }, { "loss": 3.5725, "learning_rate": 7.627458216903043e-06, "epoch": 1.2169561211887148, "total_flos": 1161365931837972480, "step": 302500 }, { "loss": 3.59375, "learning_rate": 7.6266470915071125e-06, "epoch": 1.2173584207329093, "total_flos": 1161741441975582720, "step": 302600 }, { "loss": 3.52875, "learning_rate": 7.625835966111182e-06, "epoch": 1.217760720277104, "total_flos": 1162114349604495360, "step": 302700 }, { "loss": 3.5775, "learning_rate": 7.625024840715251e-06, "epoch": 1.2181630198212985, "total_flos": 1162495962359439360, "step": 302800 }, { "loss": 3.63375, "learning_rate": 7.624213715319321e-06, "epoch": 1.218565319365493, "total_flos": 1162884118564823040, "step": 302900 }, { "loss": 3.5825, "learning_rate": 7.62340258992339e-06, "epoch": 1.2189676189096879, "total_flos": 1163266538628587520, "step": 303000 }, { "loss": 3.62875, "learning_rate": 7.622591464527459e-06, "epoch": 1.2193699184538824, "total_flos": 1163636886238740480, "step": 303100 }, { "loss": 3.56375, "learning_rate": 7.6217803391315285e-06, "epoch": 1.219772217998077, "total_flos": 1164028346036797440, "step": 303200 }, { "loss": 3.61, "learning_rate": 7.6209692137355985e-06, "epoch": 1.2201745175422716, "total_flos": 1164412826862551040, "step": 303300 }, { "loss": 3.60875, "learning_rate": 7.620158088339668e-06, "epoch": 1.2205768170864661, "total_flos": 1164786148768358400, "step": 303400 }, { "loss": 3.565, "learning_rate": 7.619346962943737e-06, "epoch": 1.220979116630661, "total_flos": 1165170061291192320, "step": 303500 }, { "loss": 3.56, "learning_rate": 7.618535837547806e-06, "epoch": 1.2213814161748555, "total_flos": 1165560989965025280, "step": 303600 }, { "loss": 3.58375, "learning_rate": 7.617724712151876e-06, "epoch": 1.22178371571905, "total_flos": 1165952540054200320, "step": 303700 }, { "loss": 3.5425, "learning_rate": 7.616913586755945e-06, "epoch": 1.2221860152632447, "total_flos": 1166337026191196160, "step": 303800 }, { "loss": 3.58875, "learning_rate": 7.6161024613600146e-06, "epoch": 1.2225883148074392, "total_flos": 1166720981203968000, "step": 303900 }, { "loss": 3.55375, "learning_rate": 7.615291335964085e-06, "epoch": 1.222990614351634, "total_flos": 1167097654503628800, "step": 304000 }, { "loss": 3.54375, "learning_rate": 7.614480210568154e-06, "epoch": 1.2233929138958286, "total_flos": 1167482916081991680, "step": 304100 }, { "loss": 3.6225, "learning_rate": 7.613669085172223e-06, "epoch": 1.2237952134400232, "total_flos": 1167870286223523840, "step": 304200 }, { "loss": 3.62, "learning_rate": 7.612857959776292e-06, "epoch": 1.2241975129842178, "total_flos": 1168255176014929920, "step": 304300 }, { "loss": 3.5925, "learning_rate": 7.612046834380362e-06, "epoch": 1.2245998125284123, "total_flos": 1168647671505223680, "step": 304400 }, { "loss": 3.54875, "learning_rate": 7.6112357089844314e-06, "epoch": 1.2250021120726071, "total_flos": 1169038600179056640, "step": 304500 }, { "loss": 3.6025, "learning_rate": 7.610424583588501e-06, "epoch": 1.2254044116168017, "total_flos": 1169433724734259200, "step": 304600 }, { "loss": 3.6275, "learning_rate": 7.60961345819257e-06, "epoch": 1.2258067111609963, "total_flos": 1169812039207772160, "step": 304700 }, { "loss": 3.5775, "learning_rate": 7.60880233279664e-06, "epoch": 1.2262090107051908, "total_flos": 1170196004843028480, "step": 304800 }, { "loss": 3.57625, "learning_rate": 7.607991207400709e-06, "epoch": 1.2266113102493854, "total_flos": 1170578403661824000, "step": 304900 }, { "loss": 3.5675, "learning_rate": 7.607180082004778e-06, "epoch": 1.22701360979358, "total_flos": 1170972673107025920, "step": 305000 }, { "loss": 3.57125, "learning_rate": 7.6063689566088475e-06, "epoch": 1.2274159093377748, "total_flos": 1171343440305315840, "step": 305100 }, { "loss": 3.61625, "learning_rate": 7.6055578312129175e-06, "epoch": 1.2278182088819694, "total_flos": 1171719948956467200, "step": 305200 }, { "loss": 3.60125, "learning_rate": 7.604746705816987e-06, "epoch": 1.228220508426164, "total_flos": 1172111610581729280, "step": 305300 }, { "loss": 3.565, "learning_rate": 7.603935580421056e-06, "epoch": 1.2286228079703585, "total_flos": 1172484815640207360, "step": 305400 }, { "loss": 3.54, "learning_rate": 7.603124455025125e-06, "epoch": 1.2290251075145533, "total_flos": 1172864048958627840, "step": 305500 }, { "loss": 3.59, "learning_rate": 7.602313329629195e-06, "epoch": 1.2294274070587479, "total_flos": 1173236048365117440, "step": 305600 }, { "loss": 3.5925, "learning_rate": 7.601502204233264e-06, "epoch": 1.2298297066029424, "total_flos": 1173616784765091840, "step": 305700 }, { "loss": 3.59125, "learning_rate": 7.6006910788373335e-06, "epoch": 1.230232006147137, "total_flos": 1174002322528051200, "step": 305800 }, { "loss": 3.60125, "learning_rate": 7.599879953441403e-06, "epoch": 1.2306343056913316, "total_flos": 1174381263728148480, "step": 305900 }, { "loss": 3.575, "learning_rate": 7.599068828045473e-06, "epoch": 1.2310366052355262, "total_flos": 1174760332398059520, "step": 306000 }, { "loss": 3.60625, "learning_rate": 7.598257702649542e-06, "epoch": 1.231438904779721, "total_flos": 1175142577190830080, "step": 306100 }, { "loss": 3.57625, "learning_rate": 7.597446577253611e-06, "epoch": 1.2318412043239155, "total_flos": 1175517768653905920, "step": 306200 }, { "loss": 3.61625, "learning_rate": 7.59663545185768e-06, "epoch": 1.23224350386811, "total_flos": 1175913477445754880, "step": 306300 }, { "loss": 3.54, "learning_rate": 7.59582432646175e-06, "epoch": 1.2326458034123047, "total_flos": 1176309775785492480, "step": 306400 }, { "loss": 3.55, "learning_rate": 7.59501320106582e-06, "epoch": 1.2330481029564995, "total_flos": 1176676856981667840, "step": 306500 }, { "loss": 3.5975, "learning_rate": 7.594202075669889e-06, "epoch": 1.233450402500694, "total_flos": 1177065778005934080, "step": 306600 }, { "loss": 3.58375, "learning_rate": 7.593390950273958e-06, "epoch": 1.2338527020448886, "total_flos": 1177466256293314560, "step": 306700 }, { "loss": 3.59375, "learning_rate": 7.592579824878028e-06, "epoch": 1.2342550015890832, "total_flos": 1177859351953981440, "step": 306800 }, { "loss": 3.60125, "learning_rate": 7.591768699482097e-06, "epoch": 1.2346573011332778, "total_flos": 1178247301020917760, "step": 306900 }, { "loss": 3.63, "learning_rate": 7.590957574086166e-06, "epoch": 1.2350596006774723, "total_flos": 1178624686027038720, "step": 307000 }, { "loss": 3.5575, "learning_rate": 7.590146448690236e-06, "epoch": 1.2354619002216671, "total_flos": 1179006537787883520, "step": 307100 }, { "loss": 3.565, "learning_rate": 7.589335323294306e-06, "epoch": 1.2358641997658617, "total_flos": 1179389616445685760, "step": 307200 }, { "loss": 3.565, "learning_rate": 7.588524197898375e-06, "epoch": 1.2362664993100563, "total_flos": 1179785059675422720, "step": 307300 }, { "loss": 3.5175, "learning_rate": 7.587713072502444e-06, "epoch": 1.2366687988542509, "total_flos": 1180166661807882240, "step": 307400 }, { "loss": 3.6075, "learning_rate": 7.586901947106513e-06, "epoch": 1.2370710983984454, "total_flos": 1180545241843507200, "step": 307500 }, { "loss": 3.55, "learning_rate": 7.586090821710583e-06, "epoch": 1.2374733979426402, "total_flos": 1180918908980060160, "step": 307600 }, { "loss": 3.57125, "learning_rate": 7.5852796963146525e-06, "epoch": 1.2378756974868348, "total_flos": 1181311277000540160, "step": 307700 }, { "loss": 3.56875, "learning_rate": 7.584468570918722e-06, "epoch": 1.2382779970310294, "total_flos": 1181721968806748160, "step": 307800 }, { "loss": 3.58125, "learning_rate": 7.583657445522791e-06, "epoch": 1.238680296575224, "total_flos": 1182126026871398400, "step": 307900 }, { "loss": 3.5475, "learning_rate": 7.582846320126861e-06, "epoch": 1.2390825961194185, "total_flos": 1182496257634222080, "step": 308000 }, { "loss": 3.6275, "learning_rate": 7.58203519473093e-06, "epoch": 1.2394848956636133, "total_flos": 1182874981073387520, "step": 308100 }, { "loss": 3.5675, "learning_rate": 7.581224069334999e-06, "epoch": 1.2398871952078079, "total_flos": 1183256561960878080, "step": 308200 }, { "loss": 3.51625, "learning_rate": 7.5804129439390685e-06, "epoch": 1.2402894947520025, "total_flos": 1183636023662714880, "step": 308300 }, { "loss": 3.50125, "learning_rate": 7.5796018185431385e-06, "epoch": 1.240691794296197, "total_flos": 1184039614338048000, "step": 308400 }, { "loss": 3.58875, "learning_rate": 7.578790693147208e-06, "epoch": 1.2410940938403916, "total_flos": 1184418236863610880, "step": 308500 }, { "loss": 3.5075, "learning_rate": 7.577979567751277e-06, "epoch": 1.2414963933845864, "total_flos": 1184801729798307840, "step": 308600 }, { "loss": 3.60875, "learning_rate": 7.577168442355347e-06, "epoch": 1.241898692928781, "total_flos": 1185182073166356480, "step": 308700 }, { "loss": 3.545, "learning_rate": 7.576357316959416e-06, "epoch": 1.2423009924729755, "total_flos": 1185556669770301440, "step": 308800 }, { "loss": 3.57625, "learning_rate": 7.575546191563485e-06, "epoch": 1.2427032920171701, "total_flos": 1185938107254251520, "step": 308900 }, { "loss": 3.5875, "learning_rate": 7.5747350661675546e-06, "epoch": 1.2431055915613647, "total_flos": 1186305857666949120, "step": 309000 }, { "loss": 3.48, "learning_rate": 7.573923940771625e-06, "epoch": 1.2435078911055593, "total_flos": 1186683688817418240, "step": 309100 }, { "loss": 3.56875, "learning_rate": 7.573112815375694e-06, "epoch": 1.243910190649754, "total_flos": 1187060632990433280, "step": 309200 }, { "loss": 3.5075, "learning_rate": 7.572301689979763e-06, "epoch": 1.2443124901939486, "total_flos": 1187429296936796160, "step": 309300 }, { "loss": 3.5325, "learning_rate": 7.571490564583832e-06, "epoch": 1.2447147897381432, "total_flos": 1187811669199380480, "step": 309400 }, { "loss": 3.53, "learning_rate": 7.570679439187902e-06, "epoch": 1.2451170892823378, "total_flos": 1188195188690288640, "step": 309500 }, { "loss": 3.53625, "learning_rate": 7.5698683137919714e-06, "epoch": 1.2455193888265326, "total_flos": 1188587184923811840, "step": 309600 }, { "loss": 3.62375, "learning_rate": 7.569057188396041e-06, "epoch": 1.2459216883707271, "total_flos": 1188970688480993280, "step": 309700 }, { "loss": 3.585, "learning_rate": 7.56824606300011e-06, "epoch": 1.2463239879149217, "total_flos": 1189340111934996480, "step": 309800 }, { "loss": 3.56125, "learning_rate": 7.56743493760418e-06, "epoch": 1.2467262874591163, "total_flos": 1189734423870136320, "step": 309900 }, { "loss": 3.5875, "learning_rate": 7.566623812208249e-06, "epoch": 1.2471285870033109, "total_flos": 1190109610021969920, "step": 310000 }, { "loss": 3.56, "learning_rate": 7.565812686812318e-06, "epoch": 1.2475308865475054, "total_flos": 1190493974000394240, "step": 310100 }, { "loss": 3.54875, "learning_rate": 7.5650015614163875e-06, "epoch": 1.2479331860917002, "total_flos": 1190885014210314240, "step": 310200 }, { "loss": 3.5875, "learning_rate": 7.5641904360204575e-06, "epoch": 1.2483354856358948, "total_flos": 1191278354188124160, "step": 310300 }, { "loss": 3.5875, "learning_rate": 7.563379310624527e-06, "epoch": 1.2487377851800894, "total_flos": 1191675348300595200, "step": 310400 }, { "loss": 3.59625, "learning_rate": 7.562568185228596e-06, "epoch": 1.249140084724284, "total_flos": 1192052106580131840, "step": 310500 }, { "loss": 3.5475, "learning_rate": 7.561757059832665e-06, "epoch": 1.2495423842684787, "total_flos": 1192444203727257600, "step": 310600 }, { "loss": 3.52125, "learning_rate": 7.560945934436735e-06, "epoch": 1.2499446838126733, "total_flos": 1192833995795251200, "step": 310700 }, { "loss": 3.5675, "learning_rate": 7.560134809040804e-06, "epoch": 1.250346983356868, "total_flos": 1193217212545351680, "step": 310800 }, { "loss": 3.54875, "learning_rate": 7.5593236836448735e-06, "epoch": 1.2507492829010625, "total_flos": 1193601836774645760, "step": 310900 }, { "loss": 3.555, "learning_rate": 7.558512558248943e-06, "epoch": 1.251151582445257, "total_flos": 1193980400876544000, "step": 311000 }, { "loss": 3.615, "learning_rate": 7.557701432853013e-06, "epoch": 1.2515538819894516, "total_flos": 1194357191023534080, "step": 311100 }, { "loss": 3.58875, "learning_rate": 7.556890307457082e-06, "epoch": 1.2519561815336462, "total_flos": 1194757249722777600, "step": 311200 }, { "loss": 3.5975, "learning_rate": 7.556079182061151e-06, "epoch": 1.252358481077841, "total_flos": 1195147780053442560, "step": 311300 }, { "loss": 3.555, "learning_rate": 7.55526805666522e-06, "epoch": 1.2527607806220356, "total_flos": 1195548794776289280, "step": 311400 }, { "loss": 3.55625, "learning_rate": 7.55445693126929e-06, "epoch": 1.2531630801662301, "total_flos": 1195938215057326080, "step": 311500 }, { "loss": 3.55, "learning_rate": 7.55364580587336e-06, "epoch": 1.253565379710425, "total_flos": 1196323556304322560, "step": 311600 }, { "loss": 3.5875, "learning_rate": 7.552834680477429e-06, "epoch": 1.2539676792546195, "total_flos": 1196713141233868800, "step": 311700 }, { "loss": 3.5425, "learning_rate": 7.552023555081498e-06, "epoch": 1.254369978798814, "total_flos": 1197094222864588800, "step": 311800 }, { "loss": 3.55375, "learning_rate": 7.551212429685568e-06, "epoch": 1.2547722783430086, "total_flos": 1197482857081774080, "step": 311900 }, { "loss": 3.5075, "learning_rate": 7.550401304289637e-06, "epoch": 1.2551745778872032, "total_flos": 1197873833556787200, "step": 312000 }, { "loss": 3.5975, "learning_rate": 7.549590178893706e-06, "epoch": 1.2555768774313978, "total_flos": 1198258096621608960, "step": 312100 }, { "loss": 3.5925, "learning_rate": 7.548779053497776e-06, "epoch": 1.2559791769755924, "total_flos": 1198641828562206720, "step": 312200 }, { "loss": 3.5175, "learning_rate": 7.547967928101846e-06, "epoch": 1.2563814765197872, "total_flos": 1199021269019074560, "step": 312300 }, { "loss": 3.5175, "learning_rate": 7.547156802705915e-06, "epoch": 1.2567837760639817, "total_flos": 1199410694611353600, "step": 312400 }, { "loss": 3.53125, "learning_rate": 7.546345677309984e-06, "epoch": 1.2571860756081763, "total_flos": 1199792381723688960, "step": 312500 }, { "loss": 3.55375, "learning_rate": 7.545534551914053e-06, "epoch": 1.2575883751523709, "total_flos": 1200170074781859840, "step": 312600 }, { "loss": 3.56, "learning_rate": 7.544723426518123e-06, "epoch": 1.2579906746965657, "total_flos": 1200559303858176000, "step": 312700 }, { "loss": 3.58125, "learning_rate": 7.5439123011221925e-06, "epoch": 1.2583929742407602, "total_flos": 1200939031122124800, "step": 312800 }, { "loss": 3.53375, "learning_rate": 7.543101175726262e-06, "epoch": 1.2587952737849548, "total_flos": 1201324945983283200, "step": 312900 }, { "loss": 3.5625, "learning_rate": 7.542290050330331e-06, "epoch": 1.2591975733291494, "total_flos": 1201704083699343360, "step": 313000 }, { "loss": 3.5375, "learning_rate": 7.541478924934401e-06, "epoch": 1.259599872873344, "total_flos": 1202082849628446720, "step": 313100 }, { "loss": 3.5325, "learning_rate": 7.54066779953847e-06, "epoch": 1.2600021724175385, "total_flos": 1202469996697804800, "step": 313200 }, { "loss": 3.52625, "learning_rate": 7.539856674142539e-06, "epoch": 1.2604044719617333, "total_flos": 1202843435450941440, "step": 313300 }, { "loss": 3.56375, "learning_rate": 7.5390455487466085e-06, "epoch": 1.260806771505928, "total_flos": 1203209996145377280, "step": 313400 }, { "loss": 3.56625, "learning_rate": 7.5382344233506785e-06, "epoch": 1.2612090710501225, "total_flos": 1203593908668211200, "step": 313500 }, { "loss": 3.5875, "learning_rate": 7.537423297954748e-06, "epoch": 1.261611370594317, "total_flos": 1203997350628761600, "step": 313600 }, { "loss": 3.54875, "learning_rate": 7.536612172558817e-06, "epoch": 1.2620136701385118, "total_flos": 1204377083203952640, "step": 313700 }, { "loss": 3.5525, "learning_rate": 7.535801047162887e-06, "epoch": 1.2624159696827064, "total_flos": 1204754664726036480, "step": 313800 }, { "loss": 3.5625, "learning_rate": 7.534989921766956e-06, "epoch": 1.262818269226901, "total_flos": 1205131964752281600, "step": 313900 }, { "loss": 3.64, "learning_rate": 7.534178796371025e-06, "epoch": 1.2632205687710956, "total_flos": 1205513396924989440, "step": 314000 }, { "loss": 3.5575, "learning_rate": 7.5333676709750946e-06, "epoch": 1.2636228683152901, "total_flos": 1205894239549808640, "step": 314100 }, { "loss": 3.54875, "learning_rate": 7.532556545579165e-06, "epoch": 1.2640251678594847, "total_flos": 1206266329247416320, "step": 314200 }, { "loss": 3.5375, "learning_rate": 7.531745420183234e-06, "epoch": 1.2644274674036795, "total_flos": 1206651797964226560, "step": 314300 }, { "loss": 3.55375, "learning_rate": 7.530934294787303e-06, "epoch": 1.264829766947874, "total_flos": 1207039125615820800, "step": 314400 }, { "loss": 3.5475, "learning_rate": 7.530123169391372e-06, "epoch": 1.2652320664920687, "total_flos": 1207430585413877760, "step": 314500 }, { "loss": 3.5725, "learning_rate": 7.529312043995442e-06, "epoch": 1.2656343660362632, "total_flos": 1207807619878010880, "step": 314600 }, { "loss": 3.5475, "learning_rate": 7.5285009185995114e-06, "epoch": 1.266036665580458, "total_flos": 1208212421516574720, "step": 314700 }, { "loss": 3.545, "learning_rate": 7.527689793203581e-06, "epoch": 1.2664389651246526, "total_flos": 1208592446210088960, "step": 314800 }, { "loss": 3.54, "learning_rate": 7.52687866780765e-06, "epoch": 1.2668412646688472, "total_flos": 1208986949349949440, "step": 314900 }, { "loss": 3.5325, "learning_rate": 7.52606754241172e-06, "epoch": 1.2672435642130417, "total_flos": 1209373374090362880, "step": 315000 }, { "loss": 3.52125, "learning_rate": 7.525256417015789e-06, "epoch": 1.2676458637572363, "total_flos": 1209769109438423040, "step": 315100 }, { "loss": 3.57375, "learning_rate": 7.524445291619858e-06, "epoch": 1.2680481633014309, "total_flos": 1210156745142067200, "step": 315200 }, { "loss": 3.56625, "learning_rate": 7.5236341662239275e-06, "epoch": 1.2684504628456255, "total_flos": 1210532680179056640, "step": 315300 }, { "loss": 3.57625, "learning_rate": 7.5228230408279975e-06, "epoch": 1.2688527623898203, "total_flos": 1210920273392762880, "step": 315400 }, { "loss": 3.56125, "learning_rate": 7.522011915432067e-06, "epoch": 1.2692550619340148, "total_flos": 1211290026143784960, "step": 315500 }, { "loss": 3.56, "learning_rate": 7.521200790036136e-06, "epoch": 1.2696573614782094, "total_flos": 1211674676929290240, "step": 315600 }, { "loss": 3.5575, "learning_rate": 7.520389664640205e-06, "epoch": 1.2700596610224042, "total_flos": 1212061457522933760, "step": 315700 }, { "loss": 3.4925, "learning_rate": 7.519578539244275e-06, "epoch": 1.2704619605665988, "total_flos": 1212446745657507840, "step": 315800 }, { "loss": 3.54875, "learning_rate": 7.518767413848344e-06, "epoch": 1.2708642601107933, "total_flos": 1212834025507921920, "step": 315900 }, { "loss": 3.52, "learning_rate": 7.5179562884524135e-06, "epoch": 1.271266559654988, "total_flos": 1213211893837086720, "step": 316000 }, { "loss": 3.5675, "learning_rate": 7.517145163056483e-06, "epoch": 1.2716688591991825, "total_flos": 1213600065976197120, "step": 316100 }, { "loss": 3.53375, "learning_rate": 7.516334037660553e-06, "epoch": 1.272071158743377, "total_flos": 1213989146337730560, "step": 316200 }, { "loss": 3.60125, "learning_rate": 7.515522912264622e-06, "epoch": 1.2724734582875716, "total_flos": 1214386984937717760, "step": 316300 }, { "loss": 3.585, "learning_rate": 7.514711786868691e-06, "epoch": 1.2728757578317664, "total_flos": 1214772124357509120, "step": 316400 }, { "loss": 3.55375, "learning_rate": 7.51390066147276e-06, "epoch": 1.273278057375961, "total_flos": 1215163823161466880, "step": 316500 }, { "loss": 3.485, "learning_rate": 7.51308953607683e-06, "epoch": 1.2736803569201556, "total_flos": 1215542211992371200, "step": 316600 }, { "loss": 3.5475, "learning_rate": 7.5122784106809e-06, "epoch": 1.2740826564643501, "total_flos": 1215914827502960640, "step": 316700 }, { "loss": 3.5625, "learning_rate": 7.511467285284969e-06, "epoch": 1.274484956008545, "total_flos": 1216284404982988800, "step": 316800 }, { "loss": 3.57375, "learning_rate": 7.510656159889038e-06, "epoch": 1.2748872555527395, "total_flos": 1216677150101667840, "step": 316900 }, { "loss": 3.5075, "learning_rate": 7.509845034493108e-06, "epoch": 1.275289555096934, "total_flos": 1217079843177062400, "step": 317000 }, { "loss": 3.5875, "learning_rate": 7.509033909097177e-06, "epoch": 1.2756918546411287, "total_flos": 1217462390710640640, "step": 317100 }, { "loss": 3.6, "learning_rate": 7.508222783701246e-06, "epoch": 1.2760941541853232, "total_flos": 1217838479773655040, "step": 317200 }, { "loss": 3.54125, "learning_rate": 7.507411658305316e-06, "epoch": 1.2764964537295178, "total_flos": 1218220772367605760, "step": 317300 }, { "loss": 3.54, "learning_rate": 7.506600532909386e-06, "epoch": 1.2768987532737126, "total_flos": 1218601705283543040, "step": 317400 }, { "loss": 3.5675, "learning_rate": 7.505789407513455e-06, "epoch": 1.2773010528179072, "total_flos": 1218986860637061120, "step": 317500 }, { "loss": 3.52625, "learning_rate": 7.504978282117524e-06, "epoch": 1.2777033523621018, "total_flos": 1219380450243256320, "step": 317600 }, { "loss": 3.49625, "learning_rate": 7.504167156721593e-06, "epoch": 1.2781056519062963, "total_flos": 1219769519982305280, "step": 317700 }, { "loss": 3.5725, "learning_rate": 7.503356031325663e-06, "epoch": 1.2785079514504911, "total_flos": 1220150176713646080, "step": 317800 }, { "loss": 3.54625, "learning_rate": 7.5025449059297325e-06, "epoch": 1.2789102509946857, "total_flos": 1220525479712808960, "step": 317900 }, { "loss": 3.49625, "learning_rate": 7.501733780533802e-06, "epoch": 1.2793125505388803, "total_flos": 1220896974551285760, "step": 318000 }, { "loss": 3.5475, "learning_rate": 7.500922655137871e-06, "epoch": 1.2797148500830748, "total_flos": 1221282947836108800, "step": 318100 }, { "loss": 3.55, "learning_rate": 7.500111529741942e-06, "epoch": 1.2801171496272694, "total_flos": 1221661278243348480, "step": 318200 }, { "loss": 3.5025, "learning_rate": 7.49930040434601e-06, "epoch": 1.280519449171464, "total_flos": 1222029761607475200, "step": 318300 }, { "loss": 3.57625, "learning_rate": 7.498489278950079e-06, "epoch": 1.2809217487156588, "total_flos": 1222403503101419520, "step": 318400 }, { "loss": 3.615, "learning_rate": 7.49767815355415e-06, "epoch": 1.2813240482598534, "total_flos": 1222795589626060800, "step": 318500 }, { "loss": 3.50375, "learning_rate": 7.496867028158219e-06, "epoch": 1.281726347804048, "total_flos": 1223167976753233920, "step": 318600 }, { "loss": 3.56125, "learning_rate": 7.496055902762288e-06, "epoch": 1.2821286473482425, "total_flos": 1223549403614699520, "step": 318700 }, { "loss": 3.49875, "learning_rate": 7.495244777366357e-06, "epoch": 1.2825309468924373, "total_flos": 1223918046316093440, "step": 318800 }, { "loss": 3.58375, "learning_rate": 7.494433651970428e-06, "epoch": 1.2829332464366319, "total_flos": 1224308268594708480, "step": 318900 }, { "loss": 3.52125, "learning_rate": 7.493622526574497e-06, "epoch": 1.2833355459808264, "total_flos": 1224701353632890880, "step": 319000 }, { "loss": 3.515, "learning_rate": 7.492811401178565e-06, "epoch": 1.283737845525021, "total_flos": 1225090779225169920, "step": 319100 }, { "loss": 3.58375, "learning_rate": 7.4920002757826346e-06, "epoch": 1.2841401450692156, "total_flos": 1225473576387133440, "step": 319200 }, { "loss": 3.5075, "learning_rate": 7.4911891503867055e-06, "epoch": 1.2845424446134102, "total_flos": 1225874059985756160, "step": 319300 }, { "loss": 3.54, "learning_rate": 7.490378024990775e-06, "epoch": 1.2849447441576047, "total_flos": 1226270363636736000, "step": 319400 }, { "loss": 3.52875, "learning_rate": 7.489566899594843e-06, "epoch": 1.2853470437017995, "total_flos": 1226661765011128320, "step": 319500 }, { "loss": 3.53625, "learning_rate": 7.488755774198912e-06, "epoch": 1.285749343245994, "total_flos": 1227029472933888000, "step": 319600 }, { "loss": 3.50875, "learning_rate": 7.487944648802983e-06, "epoch": 1.2861516427901887, "total_flos": 1227426998170583040, "step": 319700 }, { "loss": 3.60875, "learning_rate": 7.487133523407052e-06, "epoch": 1.2865539423343835, "total_flos": 1227814607318016000, "step": 319800 }, { "loss": 3.49125, "learning_rate": 7.486322398011121e-06, "epoch": 1.286956241878578, "total_flos": 1228206093672284160, "step": 319900 }, { "loss": 3.61625, "learning_rate": 7.48551127261519e-06, "epoch": 1.2873585414227726, "total_flos": 1228604070364569600, "step": 320000 }, { "loss": 3.5675, "learning_rate": 7.484700147219261e-06, "epoch": 1.2877608409669672, "total_flos": 1228990001159454720, "step": 320100 }, { "loss": 3.51, "learning_rate": 7.48388902182333e-06, "epoch": 1.2881631405111618, "total_flos": 1229367518946631680, "step": 320200 }, { "loss": 3.55, "learning_rate": 7.483077896427398e-06, "epoch": 1.2885654400553563, "total_flos": 1229743050329210880, "step": 320300 }, { "loss": 3.49875, "learning_rate": 7.4822667710314675e-06, "epoch": 1.288967739599551, "total_flos": 1230137930567270400, "step": 320400 }, { "loss": 3.53625, "learning_rate": 7.481455645635538e-06, "epoch": 1.2893700391437457, "total_flos": 1230520127558860800, "step": 320500 }, { "loss": 3.5725, "learning_rate": 7.4806445202396075e-06, "epoch": 1.2897723386879403, "total_flos": 1230917142916300800, "step": 320600 }, { "loss": 3.55375, "learning_rate": 7.479833394843676e-06, "epoch": 1.2901746382321349, "total_flos": 1231303456120627200, "step": 320700 }, { "loss": 3.5375, "learning_rate": 7.479022269447745e-06, "epoch": 1.2905769377763294, "total_flos": 1231697645897195520, "step": 320800 }, { "loss": 3.545, "learning_rate": 7.478211144051816e-06, "epoch": 1.2909792373205242, "total_flos": 1232082461331210240, "step": 320900 }, { "loss": 3.5375, "learning_rate": 7.477400018655885e-06, "epoch": 1.2913815368647188, "total_flos": 1232465609035161600, "step": 321000 }, { "loss": 3.50625, "learning_rate": 7.476588893259954e-06, "epoch": 1.2917838364089134, "total_flos": 1232857504355082240, "step": 321100 }, { "loss": 3.51375, "learning_rate": 7.475777767864023e-06, "epoch": 1.292186135953108, "total_flos": 1233237624650956800, "step": 321200 }, { "loss": 3.57875, "learning_rate": 7.474966642468094e-06, "epoch": 1.2925884354973025, "total_flos": 1233627910664478720, "step": 321300 }, { "loss": 3.55375, "learning_rate": 7.474155517072163e-06, "epoch": 1.292990735041497, "total_flos": 1234014680635637760, "step": 321400 }, { "loss": 3.54, "learning_rate": 7.473344391676232e-06, "epoch": 1.2933930345856919, "total_flos": 1234402852774748160, "step": 321500 }, { "loss": 3.52, "learning_rate": 7.4725332662803e-06, "epoch": 1.2937953341298865, "total_flos": 1234764585549987840, "step": 321600 }, { "loss": 3.50375, "learning_rate": 7.471722140884371e-06, "epoch": 1.294197633674081, "total_flos": 1235159970356060160, "step": 321700 }, { "loss": 3.51375, "learning_rate": 7.4709110154884404e-06, "epoch": 1.2945999332182756, "total_flos": 1235540945761935360, "step": 321800 }, { "loss": 3.48375, "learning_rate": 7.47009989009251e-06, "epoch": 1.2950022327624704, "total_flos": 1235917757153894400, "step": 321900 }, { "loss": 3.52625, "learning_rate": 7.469288764696578e-06, "epoch": 1.295404532306665, "total_flos": 1236286346742865920, "step": 322000 }, { "loss": 3.51875, "learning_rate": 7.468477639300649e-06, "epoch": 1.2958068318508595, "total_flos": 1236669266063400960, "step": 322100 }, { "loss": 3.57625, "learning_rate": 7.467666513904718e-06, "epoch": 1.2962091313950541, "total_flos": 1237059052820152320, "step": 322200 }, { "loss": 3.5275, "learning_rate": 7.466855388508787e-06, "epoch": 1.2966114309392487, "total_flos": 1237471104304373760, "step": 322300 }, { "loss": 3.5475, "learning_rate": 7.466044263112856e-06, "epoch": 1.2970137304834433, "total_flos": 1237862787174604800, "step": 322400 }, { "loss": 3.54125, "learning_rate": 7.4652331377169265e-06, "epoch": 1.297416030027638, "total_flos": 1238257173467136000, "step": 322500 }, { "loss": 3.49125, "learning_rate": 7.464422012320996e-06, "epoch": 1.2978183295718326, "total_flos": 1238633384688721920, "step": 322600 }, { "loss": 3.5075, "learning_rate": 7.463610886925065e-06, "epoch": 1.2982206291160272, "total_flos": 1239019724449259520, "step": 322700 }, { "loss": 3.51625, "learning_rate": 7.462799761529133e-06, "epoch": 1.2986229286602218, "total_flos": 1239412968824709120, "step": 322800 }, { "loss": 3.53375, "learning_rate": 7.461988636133204e-06, "epoch": 1.2990252282044166, "total_flos": 1239804014345871360, "step": 322900 }, { "loss": 3.53625, "learning_rate": 7.461177510737273e-06, "epoch": 1.2994275277486111, "total_flos": 1240195017377095680, "step": 323000 }, { "loss": 3.545, "learning_rate": 7.4603663853413425e-06, "epoch": 1.2998298272928057, "total_flos": 1240583911845150720, "step": 323100 }, { "loss": 3.5325, "learning_rate": 7.4595552599454126e-06, "epoch": 1.3002321268370003, "total_flos": 1240961748306862080, "step": 323200 }, { "loss": 3.51625, "learning_rate": 7.458744134549482e-06, "epoch": 1.3006344263811949, "total_flos": 1241326938700800000, "step": 323300 }, { "loss": 3.54125, "learning_rate": 7.457933009153551e-06, "epoch": 1.3010367259253894, "total_flos": 1241699129312010240, "step": 323400 }, { "loss": 3.5275, "learning_rate": 7.45712188375762e-06, "epoch": 1.301439025469584, "total_flos": 1242067841059553280, "step": 323500 }, { "loss": 3.53375, "learning_rate": 7.45631075836169e-06, "epoch": 1.3018413250137788, "total_flos": 1242460904852766720, "step": 323600 }, { "loss": 3.49625, "learning_rate": 7.455499632965759e-06, "epoch": 1.3022436245579734, "total_flos": 1242835103113543680, "step": 323700 }, { "loss": 3.5625, "learning_rate": 7.454688507569829e-06, "epoch": 1.302645924102168, "total_flos": 1243213773440286720, "step": 323800 }, { "loss": 3.4925, "learning_rate": 7.453877382173898e-06, "epoch": 1.3030482236463627, "total_flos": 1243596862720573440, "step": 323900 }, { "loss": 3.48875, "learning_rate": 7.453066256777968e-06, "epoch": 1.3034505231905573, "total_flos": 1243986325491548160, "step": 324000 }, { "loss": 3.47625, "learning_rate": 7.452255131382037e-06, "epoch": 1.303852822734752, "total_flos": 1244378698823270400, "step": 324100 }, { "loss": 3.54125, "learning_rate": 7.451444005986106e-06, "epoch": 1.3042551222789465, "total_flos": 1244759615805480960, "step": 324200 }, { "loss": 3.525, "learning_rate": 7.450632880590175e-06, "epoch": 1.304657421823141, "total_flos": 1245132135713710080, "step": 324300 }, { "loss": 3.44, "learning_rate": 7.4498217551942455e-06, "epoch": 1.3050597213673356, "total_flos": 1245512287877038080, "step": 324400 }, { "loss": 3.495, "learning_rate": 7.449010629798315e-06, "epoch": 1.3054620209115302, "total_flos": 1245910625733795840, "step": 324500 }, { "loss": 3.56375, "learning_rate": 7.448199504402384e-06, "epoch": 1.305864320455725, "total_flos": 1246304358743531520, "step": 324600 }, { "loss": 3.55125, "learning_rate": 7.447388379006453e-06, "epoch": 1.3062666199999196, "total_flos": 1246678339243376640, "step": 324700 }, { "loss": 3.535, "learning_rate": 7.446577253610523e-06, "epoch": 1.3066689195441141, "total_flos": 1247045553220608000, "step": 324800 }, { "loss": 3.54125, "learning_rate": 7.445766128214592e-06, "epoch": 1.3070712190883087, "total_flos": 1247437597255311360, "step": 324900 }, { "loss": 3.47375, "learning_rate": 7.4449550028186615e-06, "epoch": 1.3074735186325035, "total_flos": 1247802500842168320, "step": 325000 }, { "loss": 3.50625, "learning_rate": 7.444143877422731e-06, "epoch": 1.307875818176698, "total_flos": 1248212056042536960, "step": 325100 }, { "loss": 3.54, "learning_rate": 7.443332752026801e-06, "epoch": 1.3082781177208926, "total_flos": 1248610255806996480, "step": 325200 }, { "loss": 3.4775, "learning_rate": 7.44252162663087e-06, "epoch": 1.3086804172650872, "total_flos": 1248995421782999040, "step": 325300 }, { "loss": 3.5225, "learning_rate": 7.441710501234939e-06, "epoch": 1.3090827168092818, "total_flos": 1249386403569254400, "step": 325400 }, { "loss": 3.52, "learning_rate": 7.440899375839008e-06, "epoch": 1.3094850163534764, "total_flos": 1249773104494264320, "step": 325500 }, { "loss": 3.50125, "learning_rate": 7.440088250443078e-06, "epoch": 1.3098873158976712, "total_flos": 1250154658825543680, "step": 325600 }, { "loss": 3.51625, "learning_rate": 7.4392771250471475e-06, "epoch": 1.3102896154418657, "total_flos": 1250532601512099840, "step": 325700 }, { "loss": 3.53875, "learning_rate": 7.438465999651217e-06, "epoch": 1.3106919149860603, "total_flos": 1250912663384309760, "step": 325800 }, { "loss": 3.48875, "learning_rate": 7.437654874255286e-06, "epoch": 1.3110942145302549, "total_flos": 1251303453965844480, "step": 325900 }, { "loss": 3.55375, "learning_rate": 7.436843748859356e-06, "epoch": 1.3114965140744497, "total_flos": 1251683398990725120, "step": 326000 }, { "loss": 3.49875, "learning_rate": 7.436032623463425e-06, "epoch": 1.3118988136186442, "total_flos": 1252058388626595840, "step": 326100 }, { "loss": 3.50125, "learning_rate": 7.435221498067494e-06, "epoch": 1.3123011131628388, "total_flos": 1252431083805818880, "step": 326200 }, { "loss": 3.5325, "learning_rate": 7.4344103726715636e-06, "epoch": 1.3127034127070334, "total_flos": 1252834637302456320, "step": 326300 }, { "loss": 3.52375, "learning_rate": 7.433599247275634e-06, "epoch": 1.313105712251228, "total_flos": 1253218119614668800, "step": 326400 }, { "loss": 3.5275, "learning_rate": 7.432788121879703e-06, "epoch": 1.3135080117954225, "total_flos": 1253604544355082240, "step": 326500 }, { "loss": 3.565, "learning_rate": 7.431976996483772e-06, "epoch": 1.3139103113396173, "total_flos": 1253995568631275520, "step": 326600 }, { "loss": 3.5, "learning_rate": 7.431165871087841e-06, "epoch": 1.314312610883812, "total_flos": 1254390374511943680, "step": 326700 }, { "loss": 3.5525, "learning_rate": 7.430354745691911e-06, "epoch": 1.3147149104280065, "total_flos": 1254784404951244800, "step": 326800 }, { "loss": 3.50125, "learning_rate": 7.4295436202959804e-06, "epoch": 1.315117209972201, "total_flos": 1255169369100042240, "step": 326900 }, { "loss": 3.51875, "learning_rate": 7.42873249490005e-06, "epoch": 1.3155195095163958, "total_flos": 1255563601366548480, "step": 327000 }, { "loss": 3.5425, "learning_rate": 7.427921369504119e-06, "epoch": 1.3159218090605904, "total_flos": 1255943870377205760, "step": 327100 }, { "loss": 3.47875, "learning_rate": 7.427110244108189e-06, "epoch": 1.316324108604785, "total_flos": 1256328425560350720, "step": 327200 }, { "loss": 3.47625, "learning_rate": 7.426299118712258e-06, "epoch": 1.3167264081489796, "total_flos": 1256708790173368320, "step": 327300 }, { "loss": 3.5325, "learning_rate": 7.425487993316327e-06, "epoch": 1.3171287076931741, "total_flos": 1257098141408256000, "step": 327400 }, { "loss": 3.5225, "learning_rate": 7.4246768679203965e-06, "epoch": 1.3175310072373687, "total_flos": 1257483652615004160, "step": 327500 }, { "loss": 3.505, "learning_rate": 7.4238657425244665e-06, "epoch": 1.3179333067815633, "total_flos": 1257883201434992640, "step": 327600 }, { "loss": 3.51125, "learning_rate": 7.423054617128536e-06, "epoch": 1.318335606325758, "total_flos": 1258269610241679360, "step": 327700 }, { "loss": 3.5275, "learning_rate": 7.422243491732605e-06, "epoch": 1.3187379058699527, "total_flos": 1258643649165189120, "step": 327800 }, { "loss": 3.515, "learning_rate": 7.421432366336675e-06, "epoch": 1.3191402054141472, "total_flos": 1259020529603297280, "step": 327900 }, { "loss": 3.535, "learning_rate": 7.420621240940744e-06, "epoch": 1.3195425049583418, "total_flos": 1259403363943956480, "step": 328000 }, { "loss": 3.5125, "learning_rate": 7.419810115544813e-06, "epoch": 1.3199448045025366, "total_flos": 1259798844352389120, "step": 328100 }, { "loss": 3.48625, "learning_rate": 7.4189989901488825e-06, "epoch": 1.3203471040467312, "total_flos": 1260200294597099520, "step": 328200 }, { "loss": 3.48625, "learning_rate": 7.4181878647529526e-06, "epoch": 1.3207494035909257, "total_flos": 1260592019957268480, "step": 328300 }, { "loss": 3.4825, "learning_rate": 7.417376739357022e-06, "epoch": 1.3211517031351203, "total_flos": 1260982375016939520, "step": 328400 }, { "loss": 3.55, "learning_rate": 7.416565613961091e-06, "epoch": 1.3215540026793149, "total_flos": 1261374992665804800, "step": 328500 }, { "loss": 3.53875, "learning_rate": 7.41575448856516e-06, "epoch": 1.3219563022235095, "total_flos": 1261753578012672000, "step": 328600 }, { "loss": 3.42625, "learning_rate": 7.41494336316923e-06, "epoch": 1.3223586017677043, "total_flos": 1262130246001090560, "step": 328700 }, { "loss": 3.5075, "learning_rate": 7.414132237773299e-06, "epoch": 1.3227609013118988, "total_flos": 1262519809685667840, "step": 328800 }, { "loss": 3.515, "learning_rate": 7.413321112377369e-06, "epoch": 1.3231632008560934, "total_flos": 1262909283079127040, "step": 328900 }, { "loss": 3.41, "learning_rate": 7.412509986981438e-06, "epoch": 1.323565500400288, "total_flos": 1263292467961774080, "step": 329000 }, { "loss": 3.47375, "learning_rate": 7.411698861585508e-06, "epoch": 1.3239677999444828, "total_flos": 1263684033984675840, "step": 329100 }, { "loss": 3.5175, "learning_rate": 7.410887736189577e-06, "epoch": 1.3243700994886773, "total_flos": 1264075854947205120, "step": 329200 }, { "loss": 3.48875, "learning_rate": 7.410076610793646e-06, "epoch": 1.324772399032872, "total_flos": 1264459762158796800, "step": 329300 }, { "loss": 3.49875, "learning_rate": 7.409265485397715e-06, "epoch": 1.3251746985770665, "total_flos": 1264854132517601280, "step": 329400 }, { "loss": 3.5325, "learning_rate": 7.4084543600017855e-06, "epoch": 1.325576998121261, "total_flos": 1265228314844651520, "step": 329500 }, { "loss": 3.51, "learning_rate": 7.407643234605855e-06, "epoch": 1.3259792976654556, "total_flos": 1265608158955929600, "step": 329600 }, { "loss": 3.51, "learning_rate": 7.406832109209924e-06, "epoch": 1.3263815972096504, "total_flos": 1266004191733555200, "step": 329700 }, { "loss": 3.4975, "learning_rate": 7.406020983813993e-06, "epoch": 1.326783896753845, "total_flos": 1266392337316454400, "step": 329800 }, { "loss": 3.43875, "learning_rate": 7.405209858418063e-06, "epoch": 1.3271861962980396, "total_flos": 1266763332898160640, "step": 329900 }, { "loss": 3.5125, "learning_rate": 7.404398733022132e-06, "epoch": 1.3275884958422342, "total_flos": 1267140340806082560, "step": 330000 }, { "loss": 3.5025, "learning_rate": 7.4035876076262015e-06, "epoch": 1.327990795386429, "total_flos": 1267529463657553920, "step": 330100 }, { "loss": 3.47375, "learning_rate": 7.402776482230271e-06, "epoch": 1.3283930949306235, "total_flos": 1267919712492380160, "step": 330200 }, { "loss": 3.52, "learning_rate": 7.401965356834341e-06, "epoch": 1.328795394474818, "total_flos": 1268310258756771840, "step": 330300 }, { "loss": 3.47875, "learning_rate": 7.40115423143841e-06, "epoch": 1.3291976940190127, "total_flos": 1268677722362388480, "step": 330400 }, { "loss": 3.5175, "learning_rate": 7.400343106042479e-06, "epoch": 1.3295999935632072, "total_flos": 1269072098032435200, "step": 330500 }, { "loss": 3.52125, "learning_rate": 7.399531980646548e-06, "epoch": 1.3300022931074018, "total_flos": 1269455920264151040, "step": 330600 }, { "loss": 3.5075, "learning_rate": 7.398720855250618e-06, "epoch": 1.3304045926515966, "total_flos": 1269861183980789760, "step": 330700 }, { "loss": 3.45125, "learning_rate": 7.3979097298546875e-06, "epoch": 1.3308068921957912, "total_flos": 1270256956507545600, "step": 330800 }, { "loss": 3.53625, "learning_rate": 7.397098604458757e-06, "epoch": 1.3312091917399858, "total_flos": 1270645330473861120, "step": 330900 }, { "loss": 3.5225, "learning_rate": 7.396287479062826e-06, "epoch": 1.3316114912841803, "total_flos": 1271029179261788160, "step": 331000 }, { "loss": 3.5375, "learning_rate": 7.395476353666896e-06, "epoch": 1.3320137908283751, "total_flos": 1271418413649346560, "step": 331100 }, { "loss": 3.50875, "learning_rate": 7.394665228270965e-06, "epoch": 1.3324160903725697, "total_flos": 1271809214853365760, "step": 331200 }, { "loss": 3.52625, "learning_rate": 7.393854102875034e-06, "epoch": 1.3328183899167643, "total_flos": 1272192128862658560, "step": 331300 }, { "loss": 3.5475, "learning_rate": 7.3930429774791036e-06, "epoch": 1.3332206894609588, "total_flos": 1272586562956369920, "step": 331400 }, { "loss": 3.5175, "learning_rate": 7.392231852083174e-06, "epoch": 1.3336229890051534, "total_flos": 1272969232648519680, "step": 331500 }, { "loss": 3.4975, "learning_rate": 7.391420726687243e-06, "epoch": 1.334025288549348, "total_flos": 1273360453440675840, "step": 331600 }, { "loss": 3.4925, "learning_rate": 7.390609601291312e-06, "epoch": 1.3344275880935426, "total_flos": 1273753713749852160, "step": 331700 }, { "loss": 3.495, "learning_rate": 7.389798475895381e-06, "epoch": 1.3348298876377374, "total_flos": 1274136723361505280, "step": 331800 }, { "loss": 3.515, "learning_rate": 7.388987350499451e-06, "epoch": 1.335232187181932, "total_flos": 1274520619950612480, "step": 331900 }, { "loss": 3.50125, "learning_rate": 7.3881762251035204e-06, "epoch": 1.3356344867261265, "total_flos": 1274907979469660160, "step": 332000 }, { "loss": 3.52875, "learning_rate": 7.38736509970759e-06, "epoch": 1.336036786270321, "total_flos": 1275293979310694400, "step": 332100 }, { "loss": 3.505, "learning_rate": 7.386553974311659e-06, "epoch": 1.3364390858145159, "total_flos": 1275678582295019520, "step": 332200 }, { "loss": 3.505, "learning_rate": 7.385742848915729e-06, "epoch": 1.3368413853587104, "total_flos": 1276061347589529600, "step": 332300 }, { "loss": 3.4625, "learning_rate": 7.384931723519798e-06, "epoch": 1.337243684902905, "total_flos": 1276436544363847680, "step": 332400 }, { "loss": 3.4725, "learning_rate": 7.384120598123867e-06, "epoch": 1.3376459844470996, "total_flos": 1276820499376619520, "step": 332500 }, { "loss": 3.48375, "learning_rate": 7.383309472727937e-06, "epoch": 1.3380482839912942, "total_flos": 1277203216869949440, "step": 332600 }, { "loss": 3.51, "learning_rate": 7.3824983473320065e-06, "epoch": 1.3384505835354887, "total_flos": 1277589992152350720, "step": 332700 }, { "loss": 3.4775, "learning_rate": 7.381687221936076e-06, "epoch": 1.3388528830796835, "total_flos": 1277967802057850880, "step": 332800 }, { "loss": 3.53625, "learning_rate": 7.380876096540145e-06, "epoch": 1.339255182623878, "total_flos": 1278342563310305280, "step": 332900 }, { "loss": 3.445, "learning_rate": 7.380064971144215e-06, "epoch": 1.3396574821680727, "total_flos": 1278738038407495680, "step": 333000 }, { "loss": 3.46875, "learning_rate": 7.379253845748284e-06, "epoch": 1.3400597817122673, "total_flos": 1279117786916413440, "step": 333100 }, { "loss": 3.545, "learning_rate": 7.378442720352353e-06, "epoch": 1.340462081256462, "total_flos": 1279514765095157760, "step": 333200 }, { "loss": 3.49875, "learning_rate": 7.3776315949564225e-06, "epoch": 1.3408643808006566, "total_flos": 1279899989494824960, "step": 333300 }, { "loss": 3.51, "learning_rate": 7.3768204695604926e-06, "epoch": 1.3412666803448512, "total_flos": 1280272424423178240, "step": 333400 }, { "loss": 3.515, "learning_rate": 7.376009344164562e-06, "epoch": 1.3416689798890458, "total_flos": 1280657988742348800, "step": 333500 }, { "loss": 3.50625, "learning_rate": 7.375198218768631e-06, "epoch": 1.3420712794332403, "total_flos": 1281032372896604160, "step": 333600 }, { "loss": 3.49, "learning_rate": 7.3743870933727e-06, "epoch": 1.342473578977435, "total_flos": 1281405418617815040, "step": 333700 }, { "loss": 3.51375, "learning_rate": 7.37357596797677e-06, "epoch": 1.3428758785216297, "total_flos": 1281775208547532800, "step": 333800 }, { "loss": 3.51875, "learning_rate": 7.372764842580839e-06, "epoch": 1.3432781780658243, "total_flos": 1282162690225152000, "step": 333900 }, { "loss": 3.4925, "learning_rate": 7.371953717184909e-06, "epoch": 1.3436804776100189, "total_flos": 1282538949247918080, "step": 334000 }, { "loss": 3.51, "learning_rate": 7.371142591788978e-06, "epoch": 1.3440827771542134, "total_flos": 1282910821184593920, "step": 334100 }, { "loss": 3.47375, "learning_rate": 7.370331466393048e-06, "epoch": 1.3444850766984082, "total_flos": 1283294797442334720, "step": 334200 }, { "loss": 3.51875, "learning_rate": 7.369520340997117e-06, "epoch": 1.3448873762426028, "total_flos": 1283665426548326400, "step": 334300 }, { "loss": 3.47, "learning_rate": 7.368709215601186e-06, "epoch": 1.3452896757867974, "total_flos": 1284065649896079360, "step": 334400 }, { "loss": 3.4925, "learning_rate": 7.367898090205255e-06, "epoch": 1.345691975330992, "total_flos": 1284438541591265280, "step": 334500 }, { "loss": 3.45625, "learning_rate": 7.3670869648093255e-06, "epoch": 1.3460942748751865, "total_flos": 1284838483443179520, "step": 334600 }, { "loss": 3.48625, "learning_rate": 7.366275839413395e-06, "epoch": 1.346496574419381, "total_flos": 1285220213045452800, "step": 334700 }, { "loss": 3.49375, "learning_rate": 7.365464714017464e-06, "epoch": 1.3468988739635759, "total_flos": 1285601007869091840, "step": 334800 }, { "loss": 3.42125, "learning_rate": 7.364653588621533e-06, "epoch": 1.3473011735077705, "total_flos": 1285961646528430080, "step": 334900 }, { "loss": 3.51, "learning_rate": 7.363842463225603e-06, "epoch": 1.347703473051965, "total_flos": 1286333704358584320, "step": 335000 }, { "loss": 3.47625, "learning_rate": 7.363031337829672e-06, "epoch": 1.3481057725961596, "total_flos": 1286707727348367360, "step": 335100 }, { "loss": 3.50125, "learning_rate": 7.3622202124337415e-06, "epoch": 1.3485080721403544, "total_flos": 1287090960032194560, "step": 335200 }, { "loss": 3.51625, "learning_rate": 7.361409087037811e-06, "epoch": 1.348910371684549, "total_flos": 1287492734262681600, "step": 335300 }, { "loss": 3.505, "learning_rate": 7.360597961641881e-06, "epoch": 1.3493126712287435, "total_flos": 1287893244417515520, "step": 335400 }, { "loss": 3.52, "learning_rate": 7.35978683624595e-06, "epoch": 1.3497149707729381, "total_flos": 1288268722687672320, "step": 335500 }, { "loss": 3.52875, "learning_rate": 7.358975710850019e-06, "epoch": 1.3501172703171327, "total_flos": 1288656023783055360, "step": 335600 }, { "loss": 3.5225, "learning_rate": 7.358164585454088e-06, "epoch": 1.3505195698613273, "total_flos": 1289037285996011520, "step": 335700 }, { "loss": 3.5125, "learning_rate": 7.357353460058158e-06, "epoch": 1.3509218694055218, "total_flos": 1289428565211832320, "step": 335800 }, { "loss": 3.485, "learning_rate": 7.3565423346622275e-06, "epoch": 1.3513241689497166, "total_flos": 1289814841237463040, "step": 335900 }, { "loss": 3.4825, "learning_rate": 7.355731209266297e-06, "epoch": 1.3517264684939112, "total_flos": 1290207538554961920, "step": 336000 }, { "loss": 3.4925, "learning_rate": 7.354920083870366e-06, "epoch": 1.3521287680381058, "total_flos": 1290578157038469120, "step": 336100 }, { "loss": 3.5125, "learning_rate": 7.354108958474436e-06, "epoch": 1.3525310675823004, "total_flos": 1290965490001305600, "step": 336200 }, { "loss": 3.4675, "learning_rate": 7.353297833078505e-06, "epoch": 1.3529333671264951, "total_flos": 1291363434826137600, "step": 336300 }, { "loss": 3.46125, "learning_rate": 7.352486707682574e-06, "epoch": 1.3533356666706897, "total_flos": 1291746008915927040, "step": 336400 }, { "loss": 3.4725, "learning_rate": 7.3516755822866436e-06, "epoch": 1.3537379662148843, "total_flos": 1292120950750617600, "step": 336500 }, { "loss": 3.4725, "learning_rate": 7.350864456890714e-06, "epoch": 1.3541402657590789, "total_flos": 1292522448796508160, "step": 336600 }, { "loss": 3.49125, "learning_rate": 7.350053331494783e-06, "epoch": 1.3545425653032734, "total_flos": 1292892498977095680, "step": 336700 }, { "loss": 3.49875, "learning_rate": 7.349242206098852e-06, "epoch": 1.354944864847468, "total_flos": 1293282620342108160, "step": 336800 }, { "loss": 3.56, "learning_rate": 7.348431080702921e-06, "epoch": 1.3553471643916628, "total_flos": 1293659681362452480, "step": 336900 }, { "loss": 3.47125, "learning_rate": 7.347619955306991e-06, "epoch": 1.3557494639358574, "total_flos": 1294035133076398080, "step": 337000 }, { "loss": 3.50625, "learning_rate": 7.3468088299110604e-06, "epoch": 1.356151763480052, "total_flos": 1294412762399662080, "step": 337100 }, { "loss": 3.47, "learning_rate": 7.34599770451513e-06, "epoch": 1.3565540630242465, "total_flos": 1294788299093483520, "step": 337200 }, { "loss": 3.46875, "learning_rate": 7.3451865791192e-06, "epoch": 1.3569563625684413, "total_flos": 1295168217562152960, "step": 337300 }, { "loss": 3.47, "learning_rate": 7.344375453723269e-06, "epoch": 1.357358662112636, "total_flos": 1295550727917035520, "step": 337400 }, { "loss": 3.455, "learning_rate": 7.343564328327338e-06, "epoch": 1.3577609616568305, "total_flos": 1295909799759912960, "step": 337500 }, { "loss": 3.495, "learning_rate": 7.342753202931407e-06, "epoch": 1.358163261201025, "total_flos": 1296289091501998080, "step": 337600 }, { "loss": 3.51875, "learning_rate": 7.341942077535477e-06, "epoch": 1.3585655607452196, "total_flos": 1296667846808616960, "step": 337700 }, { "loss": 3.49125, "learning_rate": 7.3411309521395465e-06, "epoch": 1.3589678602894142, "total_flos": 1297052401991761920, "step": 337800 }, { "loss": 3.47875, "learning_rate": 7.340319826743616e-06, "epoch": 1.359370159833609, "total_flos": 1297423493175828480, "step": 337900 }, { "loss": 3.48625, "learning_rate": 7.339508701347685e-06, "epoch": 1.3597724593778036, "total_flos": 1297805573320089600, "step": 338000 }, { "loss": 3.4925, "learning_rate": 7.338697575951755e-06, "epoch": 1.3601747589219981, "total_flos": 1298184078998323200, "step": 338100 }, { "loss": 3.50625, "learning_rate": 7.337886450555824e-06, "epoch": 1.3605770584661927, "total_flos": 1298564263029104640, "step": 338200 }, { "loss": 3.4575, "learning_rate": 7.337075325159893e-06, "epoch": 1.3609793580103875, "total_flos": 1298933601503232000, "step": 338300 }, { "loss": 3.48125, "learning_rate": 7.3362641997639625e-06, "epoch": 1.361381657554582, "total_flos": 1299315193013207040, "step": 338400 }, { "loss": 3.48, "learning_rate": 7.335453074368033e-06, "epoch": 1.3617839570987766, "total_flos": 1299710030761328640, "step": 338500 }, { "loss": 3.4525, "learning_rate": 7.334641948972102e-06, "epoch": 1.3621862566429712, "total_flos": 1300091170815713280, "step": 338600 }, { "loss": 3.47625, "learning_rate": 7.333830823576171e-06, "epoch": 1.3625885561871658, "total_flos": 1300465597459906560, "step": 338700 }, { "loss": 3.5175, "learning_rate": 7.33301969818024e-06, "epoch": 1.3629908557313604, "total_flos": 1300839402688757760, "step": 338800 }, { "loss": 3.45, "learning_rate": 7.332208572784311e-06, "epoch": 1.363393155275555, "total_flos": 1301207976344002560, "step": 338900 }, { "loss": 3.43875, "learning_rate": 7.331397447388379e-06, "epoch": 1.3637954548197497, "total_flos": 1301580836171735040, "step": 339000 }, { "loss": 3.52875, "learning_rate": 7.330586321992449e-06, "epoch": 1.3641977543639443, "total_flos": 1301965046124134400, "step": 339100 }, { "loss": 3.48125, "learning_rate": 7.329775196596518e-06, "epoch": 1.3646000539081389, "total_flos": 1302335314065653760, "step": 339200 }, { "loss": 3.51625, "learning_rate": 7.328964071200589e-06, "epoch": 1.3650023534523337, "total_flos": 1302721005854638080, "step": 339300 }, { "loss": 3.52375, "learning_rate": 7.328152945804657e-06, "epoch": 1.3654046529965282, "total_flos": 1303097902226472960, "step": 339400 }, { "loss": 3.43125, "learning_rate": 7.327341820408726e-06, "epoch": 1.3658069525407228, "total_flos": 1303474798598307840, "step": 339500 }, { "loss": 3.46625, "learning_rate": 7.326530695012795e-06, "epoch": 1.3662092520849174, "total_flos": 1303865514822451200, "step": 339600 }, { "loss": 3.45625, "learning_rate": 7.325719569616866e-06, "epoch": 1.366611551629112, "total_flos": 1304243776183541760, "step": 339700 }, { "loss": 3.51125, "learning_rate": 7.324908444220935e-06, "epoch": 1.3670138511733065, "total_flos": 1304613635159408640, "step": 339800 }, { "loss": 3.51875, "learning_rate": 7.324097318825004e-06, "epoch": 1.367416150717501, "total_flos": 1304997754820689920, "step": 339900 }, { "loss": 3.5025, "learning_rate": 7.323286193429073e-06, "epoch": 1.367818450261696, "total_flos": 1305379580025323520, "step": 340000 }, { "loss": 3.43375, "learning_rate": 7.322475068033144e-06, "epoch": 1.3682207498058905, "total_flos": 1305763173873623040, "step": 340100 }, { "loss": 3.49375, "learning_rate": 7.321663942637212e-06, "epoch": 1.368623049350085, "total_flos": 1306158489633546240, "step": 340200 }, { "loss": 3.48375, "learning_rate": 7.3208528172412815e-06, "epoch": 1.3690253488942796, "total_flos": 1306538211586252800, "step": 340300 }, { "loss": 3.41, "learning_rate": 7.320041691845351e-06, "epoch": 1.3694276484384744, "total_flos": 1306927371616419840, "step": 340400 }, { "loss": 3.46, "learning_rate": 7.3192305664494216e-06, "epoch": 1.369829947982669, "total_flos": 1307312611949813760, "step": 340500 }, { "loss": 3.47125, "learning_rate": 7.31841944105349e-06, "epoch": 1.3702322475268636, "total_flos": 1307700869068800000, "step": 340600 }, { "loss": 3.4525, "learning_rate": 7.317608315657559e-06, "epoch": 1.3706345470710581, "total_flos": 1308090873586483200, "step": 340700 }, { "loss": 3.51, "learning_rate": 7.316797190261628e-06, "epoch": 1.3710368466152527, "total_flos": 1308483979869634560, "step": 340800 }, { "loss": 3.465, "learning_rate": 7.315986064865699e-06, "epoch": 1.3714391461594473, "total_flos": 1308878483009495040, "step": 340900 }, { "loss": 3.46125, "learning_rate": 7.315174939469768e-06, "epoch": 1.371841445703642, "total_flos": 1309254975726919680, "step": 341000 }, { "loss": 3.4875, "learning_rate": 7.314363814073837e-06, "epoch": 1.3722437452478367, "total_flos": 1309630592089374720, "step": 341100 }, { "loss": 3.4375, "learning_rate": 7.313552688677906e-06, "epoch": 1.3726460447920312, "total_flos": 1310003212911206400, "step": 341200 }, { "loss": 3.485, "learning_rate": 7.312741563281977e-06, "epoch": 1.3730483443362258, "total_flos": 1310392272027770880, "step": 341300 }, { "loss": 3.475, "learning_rate": 7.311930437886046e-06, "epoch": 1.3734506438804206, "total_flos": 1310780491968061440, "step": 341400 }, { "loss": 3.46375, "learning_rate": 7.311119312490114e-06, "epoch": 1.3738529434246152, "total_flos": 1311154870811074560, "step": 341500 }, { "loss": 3.45125, "learning_rate": 7.3103081870941836e-06, "epoch": 1.3742552429688097, "total_flos": 1311531251992412160, "step": 341600 }, { "loss": 3.45625, "learning_rate": 7.3094970616982545e-06, "epoch": 1.3746575425130043, "total_flos": 1311919965878231040, "step": 341700 }, { "loss": 3.48625, "learning_rate": 7.308685936302324e-06, "epoch": 1.3750598420571989, "total_flos": 1312301631745597440, "step": 341800 }, { "loss": 3.48875, "learning_rate": 7.307874810906392e-06, "epoch": 1.3754621416013935, "total_flos": 1312700957493411840, "step": 341900 }, { "loss": 3.47625, "learning_rate": 7.307063685510463e-06, "epoch": 1.3758644411455883, "total_flos": 1313071714069217280, "step": 342000 }, { "loss": 3.4725, "learning_rate": 7.306252560114532e-06, "epoch": 1.3762667406897828, "total_flos": 1313445561788006400, "step": 342100 }, { "loss": 3.4925, "learning_rate": 7.305441434718601e-06, "epoch": 1.3766690402339774, "total_flos": 1313824311783383040, "step": 342200 }, { "loss": 3.46625, "learning_rate": 7.30463030932267e-06, "epoch": 1.377071339778172, "total_flos": 1314191493893160960, "step": 342300 }, { "loss": 3.4375, "learning_rate": 7.3038191839267405e-06, "epoch": 1.3774736393223668, "total_flos": 1314578694074941440, "step": 342400 }, { "loss": 3.465, "learning_rate": 7.30300805853081e-06, "epoch": 1.3778759388665613, "total_flos": 1314954889362800640, "step": 342500 }, { "loss": 3.4475, "learning_rate": 7.302196933134879e-06, "epoch": 1.378278238410756, "total_flos": 1315342870297190400, "step": 342600 }, { "loss": 3.48375, "learning_rate": 7.301385807738947e-06, "epoch": 1.3786805379549505, "total_flos": 1315730532557045760, "step": 342700 }, { "loss": 3.44375, "learning_rate": 7.300574682343018e-06, "epoch": 1.379082837499145, "total_flos": 1316126299772559360, "step": 342800 }, { "loss": 3.4275, "learning_rate": 7.299763556947087e-06, "epoch": 1.3794851370433396, "total_flos": 1316502048916070400, "step": 342900 }, { "loss": 3.5425, "learning_rate": 7.2989524315511565e-06, "epoch": 1.3798874365875342, "total_flos": 1316908964429045760, "step": 343000 }, { "loss": 3.465, "learning_rate": 7.298141306155225e-06, "epoch": 1.380289736131729, "total_flos": 1317297322461634560, "step": 343100 }, { "loss": 3.4975, "learning_rate": 7.297330180759296e-06, "epoch": 1.3806920356759236, "total_flos": 1317681197805772800, "step": 343200 }, { "loss": 3.455, "learning_rate": 7.296519055363365e-06, "epoch": 1.3810943352201182, "total_flos": 1318065742366433280, "step": 343300 }, { "loss": 3.435, "learning_rate": 7.295707929967434e-06, "epoch": 1.381496634764313, "total_flos": 1318435022416896000, "step": 343400 }, { "loss": 3.50125, "learning_rate": 7.294896804571503e-06, "epoch": 1.3818989343085075, "total_flos": 1318822913060167680, "step": 343500 }, { "loss": 3.48875, "learning_rate": 7.294085679175573e-06, "epoch": 1.382301233852702, "total_flos": 1319206984920268800, "step": 343600 }, { "loss": 3.46375, "learning_rate": 7.293274553779643e-06, "epoch": 1.3827035333968967, "total_flos": 1319587880657510400, "step": 343700 }, { "loss": 3.44625, "learning_rate": 7.292463428383712e-06, "epoch": 1.3831058329410912, "total_flos": 1319974092948234240, "step": 343800 }, { "loss": 3.475, "learning_rate": 7.291652302987781e-06, "epoch": 1.3835081324852858, "total_flos": 1320348423990067200, "step": 343900 }, { "loss": 3.475, "learning_rate": 7.290841177591851e-06, "epoch": 1.3839104320294804, "total_flos": 1320727041204387840, "step": 344000 }, { "loss": 3.475, "learning_rate": 7.29003005219592e-06, "epoch": 1.3843127315736752, "total_flos": 1321116615511449600, "step": 344100 }, { "loss": 3.415, "learning_rate": 7.2892189267999894e-06, "epoch": 1.3847150311178698, "total_flos": 1321500990112358400, "step": 344200 }, { "loss": 3.5075, "learning_rate": 7.288407801404059e-06, "epoch": 1.3851173306620643, "total_flos": 1321876569296117760, "step": 344300 }, { "loss": 3.44625, "learning_rate": 7.287596676008129e-06, "epoch": 1.385519630206259, "total_flos": 1322247007197388800, "step": 344400 }, { "loss": 3.485, "learning_rate": 7.286785550612198e-06, "epoch": 1.3859219297504537, "total_flos": 1322632640562708480, "step": 344500 }, { "loss": 3.46, "learning_rate": 7.285974425216267e-06, "epoch": 1.3863242292946483, "total_flos": 1323033102916362240, "step": 344600 }, { "loss": 3.4775, "learning_rate": 7.285163299820336e-06, "epoch": 1.3867265288388428, "total_flos": 1323405845896765440, "step": 344700 }, { "loss": 3.495, "learning_rate": 7.284352174424406e-06, "epoch": 1.3871288283830374, "total_flos": 1323819065854279680, "step": 344800 }, { "loss": 3.4225, "learning_rate": 7.2835410490284755e-06, "epoch": 1.387531127927232, "total_flos": 1324183831348838400, "step": 344900 }, { "loss": 3.53875, "learning_rate": 7.282729923632545e-06, "epoch": 1.3879334274714266, "total_flos": 1324569948037201920, "step": 345000 }, { "loss": 3.47625, "learning_rate": 7.281918798236614e-06, "epoch": 1.3883357270156214, "total_flos": 1324958290136064000, "step": 345100 }, { "loss": 3.42125, "learning_rate": 7.281107672840684e-06, "epoch": 1.388738026559816, "total_flos": 1325337348183490560, "step": 345200 }, { "loss": 3.46125, "learning_rate": 7.280296547444753e-06, "epoch": 1.3891403261040105, "total_flos": 1325722004280238080, "step": 345300 }, { "loss": 3.43625, "learning_rate": 7.279485422048822e-06, "epoch": 1.389542625648205, "total_flos": 1326121632768860160, "step": 345400 }, { "loss": 3.475, "learning_rate": 7.2786742966528915e-06, "epoch": 1.3899449251923999, "total_flos": 1326491337718702080, "step": 345500 }, { "loss": 3.41, "learning_rate": 7.2778631712569616e-06, "epoch": 1.3903472247365944, "total_flos": 1326876997640232960, "step": 345600 }, { "loss": 3.4575, "learning_rate": 7.277052045861031e-06, "epoch": 1.390749524280789, "total_flos": 1327253973680701440, "step": 345700 }, { "loss": 3.4725, "learning_rate": 7.2762409204651e-06, "epoch": 1.3911518238249836, "total_flos": 1327635745772912640, "step": 345800 }, { "loss": 3.42875, "learning_rate": 7.275429795069169e-06, "epoch": 1.3915541233691782, "total_flos": 1328018527001149440, "step": 345900 }, { "loss": 3.415, "learning_rate": 7.274618669673239e-06, "epoch": 1.3919564229133727, "total_flos": 1328395311836897280, "step": 346000 }, { "loss": 3.4725, "learning_rate": 7.273807544277308e-06, "epoch": 1.3923587224575675, "total_flos": 1328796045063905280, "step": 346100 }, { "loss": 3.4375, "learning_rate": 7.272996418881378e-06, "epoch": 1.392761022001762, "total_flos": 1329176101624872960, "step": 346200 }, { "loss": 3.4325, "learning_rate": 7.272185293485447e-06, "epoch": 1.3931633215459567, "total_flos": 1329546682929684480, "step": 346300 }, { "loss": 3.495, "learning_rate": 7.271374168089517e-06, "epoch": 1.3935656210901513, "total_flos": 1329925746288353280, "step": 346400 }, { "loss": 3.4275, "learning_rate": 7.270563042693586e-06, "epoch": 1.393967920634346, "total_flos": 1330310253670318080, "step": 346500 }, { "loss": 3.44375, "learning_rate": 7.269751917297655e-06, "epoch": 1.3943702201785406, "total_flos": 1330692185099796480, "step": 346600 }, { "loss": 3.50625, "learning_rate": 7.268940791901725e-06, "epoch": 1.3947725197227352, "total_flos": 1331076485343313920, "step": 346700 }, { "loss": 3.43125, "learning_rate": 7.2681296665057945e-06, "epoch": 1.3951748192669298, "total_flos": 1331442323708805120, "step": 346800 }, { "loss": 3.43875, "learning_rate": 7.267318541109864e-06, "epoch": 1.3955771188111243, "total_flos": 1331823431895736320, "step": 346900 }, { "loss": 3.46375, "learning_rate": 7.266507415713933e-06, "epoch": 1.395979418355319, "total_flos": 1332210164688199680, "step": 347000 }, { "loss": 3.4525, "learning_rate": 7.265696290318003e-06, "epoch": 1.3963817178995135, "total_flos": 1332596722209669120, "step": 347100 }, { "loss": 3.49375, "learning_rate": 7.264885164922072e-06, "epoch": 1.3967840174437083, "total_flos": 1332973862898647040, "step": 347200 }, { "loss": 3.47375, "learning_rate": 7.264074039526141e-06, "epoch": 1.3971863169879029, "total_flos": 1333355279137628160, "step": 347300 }, { "loss": 3.4775, "learning_rate": 7.2632629141302105e-06, "epoch": 1.3975886165320974, "total_flos": 1333728335481323520, "step": 347400 }, { "loss": 3.41375, "learning_rate": 7.2624517887342805e-06, "epoch": 1.3979909160762922, "total_flos": 1334113873244282880, "step": 347500 }, { "loss": 3.46625, "learning_rate": 7.26164066333835e-06, "epoch": 1.3983932156204868, "total_flos": 1334502066628362240, "step": 347600 }, { "loss": 3.5025, "learning_rate": 7.260829537942419e-06, "epoch": 1.3987955151646814, "total_flos": 1334871102361681920, "step": 347700 }, { "loss": 3.5075, "learning_rate": 7.260018412546488e-06, "epoch": 1.399197814708876, "total_flos": 1335257867021598720, "step": 347800 }, { "loss": 3.46, "learning_rate": 7.259207287150558e-06, "epoch": 1.3996001142530705, "total_flos": 1335639192969461760, "step": 347900 }, { "loss": 3.45625, "learning_rate": 7.258396161754627e-06, "epoch": 1.400002413797265, "total_flos": 1336027051745280000, "step": 348000 }, { "loss": 3.42375, "learning_rate": 7.2575850363586965e-06, "epoch": 1.4004047133414597, "total_flos": 1336423004854272000, "step": 348100 }, { "loss": 3.43125, "learning_rate": 7.256773910962766e-06, "epoch": 1.4008070128856545, "total_flos": 1336811251350773760, "step": 348200 }, { "loss": 3.46875, "learning_rate": 7.255962785566836e-06, "epoch": 1.401209312429849, "total_flos": 1337180260527882240, "step": 348300 }, { "loss": 3.48875, "learning_rate": 7.255151660170905e-06, "epoch": 1.4016116119740436, "total_flos": 1337583670620979200, "step": 348400 }, { "loss": 3.4125, "learning_rate": 7.254340534774974e-06, "epoch": 1.4020139115182382, "total_flos": 1337959239182254080, "step": 348500 }, { "loss": 3.43625, "learning_rate": 7.253529409379043e-06, "epoch": 1.402416211062433, "total_flos": 1338340373925396480, "step": 348600 }, { "loss": 3.42, "learning_rate": 7.252718283983113e-06, "epoch": 1.4028185106066275, "total_flos": 1338714768702136320, "step": 348700 }, { "loss": 3.48125, "learning_rate": 7.251907158587183e-06, "epoch": 1.4032208101508221, "total_flos": 1339089375928565760, "step": 348800 }, { "loss": 3.48, "learning_rate": 7.251096033191252e-06, "epoch": 1.4036231096950167, "total_flos": 1339483709108674560, "step": 348900 }, { "loss": 3.4125, "learning_rate": 7.250284907795321e-06, "epoch": 1.4040254092392113, "total_flos": 1339872348637102080, "step": 349000 }, { "loss": 3.4275, "learning_rate": 7.249473782399391e-06, "epoch": 1.4044277087834058, "total_flos": 1340253648028753920, "step": 349100 }, { "loss": 3.42125, "learning_rate": 7.24866265700346e-06, "epoch": 1.4048300083276006, "total_flos": 1340627931269406720, "step": 349200 }, { "loss": 3.43, "learning_rate": 7.2478515316075294e-06, "epoch": 1.4052323078717952, "total_flos": 1341016161832181760, "step": 349300 }, { "loss": 3.44125, "learning_rate": 7.247040406211599e-06, "epoch": 1.4056346074159898, "total_flos": 1341391868485754880, "step": 349400 }, { "loss": 3.45, "learning_rate": 7.246229280815669e-06, "epoch": 1.4060369069601844, "total_flos": 1341781219720642560, "step": 349500 }, { "loss": 3.44625, "learning_rate": 7.245418155419738e-06, "epoch": 1.4064392065043791, "total_flos": 1342160044073410560, "step": 349600 }, { "loss": 3.45125, "learning_rate": 7.244607030023807e-06, "epoch": 1.4068415060485737, "total_flos": 1342541582470963200, "step": 349700 }, { "loss": 3.45875, "learning_rate": 7.243795904627876e-06, "epoch": 1.4072438055927683, "total_flos": 1342928384309575680, "step": 349800 }, { "loss": 3.48625, "learning_rate": 7.242984779231946e-06, "epoch": 1.4076461051369629, "total_flos": 1343331539463045120, "step": 349900 }, { "loss": 3.50375, "learning_rate": 7.2421736538360155e-06, "epoch": 1.4080484046811574, "total_flos": 1343704797633945600, "step": 350000 }, { "loss": 3.49625, "learning_rate": 7.241362528440085e-06, "epoch": 1.408450704225352, "total_flos": 1344076807662919680, "step": 350100 }, { "loss": 3.425, "learning_rate": 7.240551403044154e-06, "epoch": 1.4088530037695468, "total_flos": 1344453889928232960, "step": 350200 }, { "loss": 3.3875, "learning_rate": 7.239740277648224e-06, "epoch": 1.4092553033137414, "total_flos": 1344846512888340480, "step": 350300 }, { "loss": 3.39625, "learning_rate": 7.238929152252293e-06, "epoch": 1.409657602857936, "total_flos": 1345233952076021760, "step": 350400 }, { "loss": 3.46625, "learning_rate": 7.238118026856362e-06, "epoch": 1.4100599024021305, "total_flos": 1345634175423774720, "step": 350500 }, { "loss": 3.40375, "learning_rate": 7.2373069014604315e-06, "epoch": 1.4104622019463253, "total_flos": 1346019139572572160, "step": 350600 }, { "loss": 3.46375, "learning_rate": 7.2364957760645016e-06, "epoch": 1.41086450149052, "total_flos": 1346394840914903040, "step": 350700 }, { "loss": 3.4175, "learning_rate": 7.235684650668571e-06, "epoch": 1.4112668010347145, "total_flos": 1346756536511447040, "step": 350800 }, { "loss": 3.47125, "learning_rate": 7.23487352527264e-06, "epoch": 1.411669100578909, "total_flos": 1347139891353845760, "step": 350900 }, { "loss": 3.45625, "learning_rate": 7.234062399876709e-06, "epoch": 1.4120714001231036, "total_flos": 1347512480308224000, "step": 351000 }, { "loss": 3.47375, "learning_rate": 7.233251274480779e-06, "epoch": 1.4124736996672982, "total_flos": 1347906893156966400, "step": 351100 }, { "loss": 3.46625, "learning_rate": 7.232440149084848e-06, "epoch": 1.4128759992114928, "total_flos": 1348290927838371840, "step": 351200 }, { "loss": 3.48625, "learning_rate": 7.231629023688918e-06, "epoch": 1.4132782987556876, "total_flos": 1348690152672583680, "step": 351300 }, { "loss": 3.46875, "learning_rate": 7.230817898292988e-06, "epoch": 1.4136805982998821, "total_flos": 1349087173341265920, "step": 351400 }, { "loss": 3.4175, "learning_rate": 7.230006772897057e-06, "epoch": 1.4140828978440767, "total_flos": 1349489414961070080, "step": 351500 }, { "loss": 3.46125, "learning_rate": 7.229195647501126e-06, "epoch": 1.4144851973882713, "total_flos": 1349879095492976640, "step": 351600 }, { "loss": 3.44625, "learning_rate": 7.228384522105195e-06, "epoch": 1.414887496932466, "total_flos": 1350268595442647040, "step": 351700 }, { "loss": 3.3925, "learning_rate": 7.227573396709265e-06, "epoch": 1.4152897964766606, "total_flos": 1350649586782248960, "step": 351800 }, { "loss": 3.485, "learning_rate": 7.2267622713133345e-06, "epoch": 1.4156920960208552, "total_flos": 1351036940990054400, "step": 351900 }, { "loss": 3.485, "learning_rate": 7.225951145917404e-06, "epoch": 1.4160943955650498, "total_flos": 1351435661256253440, "step": 352000 }, { "loss": 3.415, "learning_rate": 7.225140020521473e-06, "epoch": 1.4164966951092444, "total_flos": 1351833101513072640, "step": 352100 }, { "loss": 3.42625, "learning_rate": 7.224328895125543e-06, "epoch": 1.416898994653439, "total_flos": 1352224980899266560, "step": 352200 }, { "loss": 3.4775, "learning_rate": 7.223517769729612e-06, "epoch": 1.4173012941976337, "total_flos": 1352606620210421760, "step": 352300 }, { "loss": 3.49375, "learning_rate": 7.222706644333681e-06, "epoch": 1.4177035937418283, "total_flos": 1352977089979146240, "step": 352400 }, { "loss": 3.46625, "learning_rate": 7.2218955189377505e-06, "epoch": 1.4181058932860229, "total_flos": 1353357066871480320, "step": 352500 }, { "loss": 3.39625, "learning_rate": 7.2210843935418205e-06, "epoch": 1.4185081928302175, "total_flos": 1353753258986373120, "step": 352600 }, { "loss": 3.46, "learning_rate": 7.22027326814589e-06, "epoch": 1.4189104923744122, "total_flos": 1354145828834058240, "step": 352700 }, { "loss": 3.4575, "learning_rate": 7.219462142749959e-06, "epoch": 1.4193127919186068, "total_flos": 1354519883691294720, "step": 352800 }, { "loss": 3.47125, "learning_rate": 7.218651017354028e-06, "epoch": 1.4197150914628014, "total_flos": 1354886922397532160, "step": 352900 }, { "loss": 3.43625, "learning_rate": 7.217839891958098e-06, "epoch": 1.420117391006996, "total_flos": 1355274292539064320, "step": 353000 }, { "loss": 3.4925, "learning_rate": 7.217028766562167e-06, "epoch": 1.4205196905511905, "total_flos": 1355658932702085120, "step": 353100 }, { "loss": 3.45375, "learning_rate": 7.2162176411662365e-06, "epoch": 1.4209219900953851, "total_flos": 1356037379956654080, "step": 353200 }, { "loss": 3.425, "learning_rate": 7.215406515770306e-06, "epoch": 1.42132428963958, "total_flos": 1356410590326374400, "step": 353300 }, { "loss": 3.445, "learning_rate": 7.214595390374376e-06, "epoch": 1.4217265891837745, "total_flos": 1356786806859202560, "step": 353400 }, { "loss": 3.41875, "learning_rate": 7.213784264978445e-06, "epoch": 1.422128888727969, "total_flos": 1357180125592043520, "step": 353500 }, { "loss": 3.435, "learning_rate": 7.212973139582514e-06, "epoch": 1.4225311882721636, "total_flos": 1357543388005048320, "step": 353600 }, { "loss": 3.4425, "learning_rate": 7.212162014186583e-06, "epoch": 1.4229334878163584, "total_flos": 1357917862450421760, "step": 353700 }, { "loss": 3.43375, "learning_rate": 7.211350888790653e-06, "epoch": 1.423335787360553, "total_flos": 1358299910727229440, "step": 353800 }, { "loss": 3.44875, "learning_rate": 7.210539763394723e-06, "epoch": 1.4237380869047476, "total_flos": 1358680700239626240, "step": 353900 }, { "loss": 3.455, "learning_rate": 7.209728637998792e-06, "epoch": 1.4241403864489421, "total_flos": 1359071384596316160, "step": 354000 }, { "loss": 3.44625, "learning_rate": 7.208917512602861e-06, "epoch": 1.4245426859931367, "total_flos": 1359463800417976320, "step": 354100 }, { "loss": 3.395, "learning_rate": 7.208106387206931e-06, "epoch": 1.4249449855373313, "total_flos": 1359834700397322240, "step": 354200 }, { "loss": 3.46875, "learning_rate": 7.207295261811e-06, "epoch": 1.425347285081526, "total_flos": 1360207836409651200, "step": 354300 }, { "loss": 3.40375, "learning_rate": 7.2064841364150694e-06, "epoch": 1.4257495846257207, "total_flos": 1360590824776335360, "step": 354400 }, { "loss": 3.43875, "learning_rate": 7.205673011019139e-06, "epoch": 1.4261518841699152, "total_flos": 1360979103140290560, "step": 354500 }, { "loss": 3.46125, "learning_rate": 7.204861885623209e-06, "epoch": 1.4265541837141098, "total_flos": 1361384329678233600, "step": 354600 }, { "loss": 3.4975, "learning_rate": 7.204050760227278e-06, "epoch": 1.4269564832583046, "total_flos": 1361781621220270080, "step": 354700 }, { "loss": 3.4725, "learning_rate": 7.203239634831347e-06, "epoch": 1.4273587828024992, "total_flos": 1362158384811048960, "step": 354800 }, { "loss": 3.4525, "learning_rate": 7.202428509435416e-06, "epoch": 1.4277610823466937, "total_flos": 1362538653821706240, "step": 354900 }, { "loss": 3.4275, "learning_rate": 7.201617384039486e-06, "epoch": 1.4281633818908883, "total_flos": 1362939737590702080, "step": 355000 }, { "loss": 3.405, "learning_rate": 7.2008062586435555e-06, "epoch": 1.428565681435083, "total_flos": 1363337554945720320, "step": 355100 }, { "loss": 3.42375, "learning_rate": 7.199995133247625e-06, "epoch": 1.4289679809792775, "total_flos": 1363727277967564800, "step": 355200 }, { "loss": 3.4675, "learning_rate": 7.199184007851694e-06, "epoch": 1.429370280523472, "total_flos": 1364112507678474240, "step": 355300 }, { "loss": 3.4125, "learning_rate": 7.198372882455764e-06, "epoch": 1.4297725800676668, "total_flos": 1364494816206151680, "step": 355400 }, { "loss": 3.46375, "learning_rate": 7.197561757059833e-06, "epoch": 1.4301748796118614, "total_flos": 1364880284922961920, "step": 355500 }, { "loss": 3.415, "learning_rate": 7.196750631663902e-06, "epoch": 1.430577179156056, "total_flos": 1365276376124252160, "step": 355600 }, { "loss": 3.42, "learning_rate": 7.1959395062679715e-06, "epoch": 1.4309794787002506, "total_flos": 1365665440552058880, "step": 355700 }, { "loss": 3.4275, "learning_rate": 7.1951283808720416e-06, "epoch": 1.4313817782444453, "total_flos": 1366062620558008320, "step": 355800 }, { "loss": 3.47875, "learning_rate": 7.194317255476111e-06, "epoch": 1.43178407778864, "total_flos": 1366444966264381440, "step": 355900 }, { "loss": 3.46, "learning_rate": 7.19350613008018e-06, "epoch": 1.4321863773328345, "total_flos": 1366824194271559680, "step": 356000 }, { "loss": 3.39625, "learning_rate": 7.19269500468425e-06, "epoch": 1.432588676877029, "total_flos": 1367208770699673600, "step": 356100 }, { "loss": 3.41875, "learning_rate": 7.191883879288319e-06, "epoch": 1.4329909764212236, "total_flos": 1367613790099169280, "step": 356200 }, { "loss": 3.3575, "learning_rate": 7.191072753892388e-06, "epoch": 1.4333932759654182, "total_flos": 1368007411572817920, "step": 356300 }, { "loss": 3.42625, "learning_rate": 7.190261628496458e-06, "epoch": 1.433795575509613, "total_flos": 1368388625984593920, "step": 356400 }, { "loss": 3.36, "learning_rate": 7.189450503100528e-06, "epoch": 1.4341978750538076, "total_flos": 1368782167789608960, "step": 356500 }, { "loss": 3.43, "learning_rate": 7.188639377704597e-06, "epoch": 1.4346001745980022, "total_flos": 1369164439138590720, "step": 356600 }, { "loss": 3.42875, "learning_rate": 7.187828252308666e-06, "epoch": 1.4350024741421967, "total_flos": 1369550810766581760, "step": 356700 }, { "loss": 3.425, "learning_rate": 7.187017126912735e-06, "epoch": 1.4354047736863915, "total_flos": 1369926257169285120, "step": 356800 }, { "loss": 3.42375, "learning_rate": 7.186206001516805e-06, "epoch": 1.435807073230586, "total_flos": 1370298548694097920, "step": 356900 }, { "loss": 3.445, "learning_rate": 7.1853948761208745e-06, "epoch": 1.4362093727747807, "total_flos": 1370682094741217280, "step": 357000 }, { "loss": 3.465, "learning_rate": 7.184583750724944e-06, "epoch": 1.4366116723189752, "total_flos": 1371078339968532480, "step": 357100 }, { "loss": 3.4125, "learning_rate": 7.183772625329013e-06, "epoch": 1.4370139718631698, "total_flos": 1371455326631485440, "step": 357200 }, { "loss": 3.4475, "learning_rate": 7.182961499933083e-06, "epoch": 1.4374162714073644, "total_flos": 1371848119551344640, "step": 357300 }, { "loss": 3.4325, "learning_rate": 7.182150374537152e-06, "epoch": 1.4378185709515592, "total_flos": 1372246696414003200, "step": 357400 }, { "loss": 3.41625, "learning_rate": 7.181339249141221e-06, "epoch": 1.4382208704957538, "total_flos": 1372630173414973440, "step": 357500 }, { "loss": 3.415, "learning_rate": 7.1805281237452905e-06, "epoch": 1.4386231700399483, "total_flos": 1373020629388247040, "step": 357600 }, { "loss": 3.43625, "learning_rate": 7.1797169983493605e-06, "epoch": 1.439025469584143, "total_flos": 1373410973825433600, "step": 357700 }, { "loss": 3.415, "learning_rate": 7.17890587295343e-06, "epoch": 1.4394277691283377, "total_flos": 1373795603365969920, "step": 357800 }, { "loss": 3.41375, "learning_rate": 7.178094747557499e-06, "epoch": 1.4398300686725323, "total_flos": 1374175686483148800, "step": 357900 }, { "loss": 3.3825, "learning_rate": 7.177283622161568e-06, "epoch": 1.4402323682167268, "total_flos": 1374582278010347520, "step": 358000 }, { "loss": 3.4375, "learning_rate": 7.176472496765638e-06, "epoch": 1.4406346677609214, "total_flos": 1374979681088471040, "step": 358100 }, { "loss": 3.41, "learning_rate": 7.175661371369707e-06, "epoch": 1.441036967305116, "total_flos": 1375369165104414720, "step": 358200 }, { "loss": 3.3875, "learning_rate": 7.1748502459737765e-06, "epoch": 1.4414392668493106, "total_flos": 1375753969915944960, "step": 358300 }, { "loss": 3.375, "learning_rate": 7.174039120577846e-06, "epoch": 1.4418415663935054, "total_flos": 1376137223844741120, "step": 358400 }, { "loss": 3.455, "learning_rate": 7.173227995181916e-06, "epoch": 1.4422438659377, "total_flos": 1376515819814092800, "step": 358500 }, { "loss": 3.4225, "learning_rate": 7.172416869785985e-06, "epoch": 1.4426461654818945, "total_flos": 1376899939475374080, "step": 358600 }, { "loss": 3.43875, "learning_rate": 7.171605744390054e-06, "epoch": 1.443048465026089, "total_flos": 1377287580490260480, "step": 358700 }, { "loss": 3.43, "learning_rate": 7.170794618994123e-06, "epoch": 1.4434507645702839, "total_flos": 1377678849083596800, "step": 358800 }, { "loss": 3.46, "learning_rate": 7.169983493598193e-06, "epoch": 1.4438530641144784, "total_flos": 1378053589091082240, "step": 358900 }, { "loss": 3.39125, "learning_rate": 7.169172368202263e-06, "epoch": 1.444255363658673, "total_flos": 1378432689628446720, "step": 359000 }, { "loss": 3.415, "learning_rate": 7.168361242806332e-06, "epoch": 1.4446576632028676, "total_flos": 1378816639329976320, "step": 359100 }, { "loss": 3.39375, "learning_rate": 7.167550117410401e-06, "epoch": 1.4450599627470622, "total_flos": 1379209336647475200, "step": 359200 }, { "loss": 3.415, "learning_rate": 7.166738992014471e-06, "epoch": 1.4454622622912567, "total_flos": 1379579519609118720, "step": 359300 }, { "loss": 3.40875, "learning_rate": 7.16592786661854e-06, "epoch": 1.4458645618354513, "total_flos": 1379971292770467840, "step": 359400 }, { "loss": 3.4825, "learning_rate": 7.1651167412226094e-06, "epoch": 1.446266861379646, "total_flos": 1380348337857085440, "step": 359500 }, { "loss": 3.425, "learning_rate": 7.164305615826679e-06, "epoch": 1.4466691609238407, "total_flos": 1380729222971842560, "step": 359600 }, { "loss": 3.4325, "learning_rate": 7.163494490430749e-06, "epoch": 1.4470714604680353, "total_flos": 1381128368137420800, "step": 359700 }, { "loss": 3.44375, "learning_rate": 7.162683365034818e-06, "epoch": 1.4474737600122298, "total_flos": 1381514522004480000, "step": 359800 }, { "loss": 3.3925, "learning_rate": 7.161872239638887e-06, "epoch": 1.4478760595564246, "total_flos": 1381903984775454720, "step": 359900 }, { "loss": 3.43125, "learning_rate": 7.161061114242956e-06, "epoch": 1.4482783591006192, "total_flos": 1382304484307804160, "step": 360000 }, { "loss": 3.3975, "learning_rate": 7.160249988847026e-06, "epoch": 1.4486806586448138, "total_flos": 1382699789445242880, "step": 360100 }, { "loss": 3.49875, "learning_rate": 7.1594388634510955e-06, "epoch": 1.4490829581890083, "total_flos": 1383086161073233920, "step": 360200 }, { "loss": 3.40375, "learning_rate": 7.158627738055165e-06, "epoch": 1.449485257733203, "total_flos": 1383468427110973440, "step": 360300 }, { "loss": 3.43625, "learning_rate": 7.157816612659234e-06, "epoch": 1.4498875572773975, "total_flos": 1383863965943070720, "step": 360400 }, { "loss": 3.47, "learning_rate": 7.157005487263304e-06, "epoch": 1.4502898568215923, "total_flos": 1384248914158141440, "step": 360500 }, { "loss": 3.35875, "learning_rate": 7.156194361867373e-06, "epoch": 1.4506921563657869, "total_flos": 1384642827750113280, "step": 360600 }, { "loss": 3.47875, "learning_rate": 7.155383236471442e-06, "epoch": 1.4510944559099814, "total_flos": 1385021136912384000, "step": 360700 }, { "loss": 3.37375, "learning_rate": 7.1545721110755115e-06, "epoch": 1.451496755454176, "total_flos": 1385395276749496320, "step": 360800 }, { "loss": 3.39375, "learning_rate": 7.1537609856795816e-06, "epoch": 1.4518990549983708, "total_flos": 1385776294645309440, "step": 360900 }, { "loss": 3.42, "learning_rate": 7.152949860283651e-06, "epoch": 1.4523013545425654, "total_flos": 1386157785241681920, "step": 361000 }, { "loss": 3.39625, "learning_rate": 7.15213873488772e-06, "epoch": 1.45270365408676, "total_flos": 1386548809517875200, "step": 361100 }, { "loss": 3.39375, "learning_rate": 7.151327609491791e-06, "epoch": 1.4531059536309545, "total_flos": 1386908635557150720, "step": 361200 }, { "loss": 3.41875, "learning_rate": 7.15051648409586e-06, "epoch": 1.453508253175149, "total_flos": 1387313437195714560, "step": 361300 }, { "loss": 3.4375, "learning_rate": 7.149705358699928e-06, "epoch": 1.4539105527193437, "total_flos": 1387694991526993920, "step": 361400 }, { "loss": 3.425, "learning_rate": 7.148894233303998e-06, "epoch": 1.4543128522635385, "total_flos": 1388079706047406080, "step": 361500 }, { "loss": 3.41875, "learning_rate": 7.1480831079080685e-06, "epoch": 1.454715151807733, "total_flos": 1388459236795392000, "step": 361600 }, { "loss": 3.4675, "learning_rate": 7.147271982512138e-06, "epoch": 1.4551174513519276, "total_flos": 1388846049256488960, "step": 361700 }, { "loss": 3.4, "learning_rate": 7.146460857116206e-06, "epoch": 1.4555197508961222, "total_flos": 1389226121751183360, "step": 361800 }, { "loss": 3.4375, "learning_rate": 7.145649731720275e-06, "epoch": 1.455922050440317, "total_flos": 1389606725370101760, "step": 361900 }, { "loss": 3.445, "learning_rate": 7.144838606324346e-06, "epoch": 1.4563243499845115, "total_flos": 1390005222564126720, "step": 362000 }, { "loss": 3.39375, "learning_rate": 7.144027480928415e-06, "epoch": 1.4567266495287061, "total_flos": 1390369818098933760, "step": 362100 }, { "loss": 3.38875, "learning_rate": 7.143216355532484e-06, "epoch": 1.4571289490729007, "total_flos": 1390741010196602880, "step": 362200 }, { "loss": 3.43, "learning_rate": 7.142405230136553e-06, "epoch": 1.4575312486170953, "total_flos": 1391130611059875840, "step": 362300 }, { "loss": 3.42375, "learning_rate": 7.141594104740624e-06, "epoch": 1.4579335481612898, "total_flos": 1391515176865505280, "step": 362400 }, { "loss": 3.4375, "learning_rate": 7.140782979344693e-06, "epoch": 1.4583358477054844, "total_flos": 1391906201141698560, "step": 362500 }, { "loss": 3.37125, "learning_rate": 7.139971853948761e-06, "epoch": 1.4587381472496792, "total_flos": 1392288599960494080, "step": 362600 }, { "loss": 3.47, "learning_rate": 7.1391607285528305e-06, "epoch": 1.4591404467938738, "total_flos": 1392667456180715520, "step": 362700 }, { "loss": 3.41125, "learning_rate": 7.138349603156901e-06, "epoch": 1.4595427463380684, "total_flos": 1393057949332684800, "step": 362800 }, { "loss": 3.36375, "learning_rate": 7.1375384777609706e-06, "epoch": 1.4599450458822631, "total_flos": 1393443518963097600, "step": 362900 }, { "loss": 3.46125, "learning_rate": 7.136727352365039e-06, "epoch": 1.4603473454264577, "total_flos": 1393816708087848960, "step": 363000 }, { "loss": 3.39375, "learning_rate": 7.135916226969108e-06, "epoch": 1.4607496449706523, "total_flos": 1394210749149634560, "step": 363100 }, { "loss": 3.405, "learning_rate": 7.135105101573179e-06, "epoch": 1.4611519445148469, "total_flos": 1394586408002027520, "step": 363200 }, { "loss": 3.42125, "learning_rate": 7.134293976177248e-06, "epoch": 1.4615542440590414, "total_flos": 1394973974659522560, "step": 363300 }, { "loss": 3.4375, "learning_rate": 7.1334828507813165e-06, "epoch": 1.461956543603236, "total_flos": 1395362497340620800, "step": 363400 }, { "loss": 3.41625, "learning_rate": 7.132671725385386e-06, "epoch": 1.4623588431474306, "total_flos": 1395753048916254720, "step": 363500 }, { "loss": 3.4325, "learning_rate": 7.131860599989457e-06, "epoch": 1.4627611426916254, "total_flos": 1396142771938099200, "step": 363600 }, { "loss": 3.38875, "learning_rate": 7.131049474593526e-06, "epoch": 1.46316344223582, "total_flos": 1396528224721182720, "step": 363700 }, { "loss": 3.39625, "learning_rate": 7.130238349197595e-06, "epoch": 1.4635657417800145, "total_flos": 1396908329083330560, "step": 363800 }, { "loss": 3.44875, "learning_rate": 7.129427223801663e-06, "epoch": 1.463968041324209, "total_flos": 1397286856006533120, "step": 363900 }, { "loss": 3.44125, "learning_rate": 7.128616098405734e-06, "epoch": 1.464370340868404, "total_flos": 1397677306668564480, "step": 364000 }, { "loss": 3.4525, "learning_rate": 7.1278049730098035e-06, "epoch": 1.4647726404125985, "total_flos": 1398046969128468480, "step": 364100 }, { "loss": 3.3925, "learning_rate": 7.126993847613873e-06, "epoch": 1.465174939956793, "total_flos": 1398424566584279040, "step": 364200 }, { "loss": 3.37125, "learning_rate": 7.126182722217941e-06, "epoch": 1.4655772395009876, "total_flos": 1398808399438479360, "step": 364300 }, { "loss": 3.43, "learning_rate": 7.125371596822012e-06, "epoch": 1.4659795390451822, "total_flos": 1399198356154982400, "step": 364400 }, { "loss": 3.4575, "learning_rate": 7.124560471426081e-06, "epoch": 1.4663818385893768, "total_flos": 1399588689969684480, "step": 364500 }, { "loss": 3.41875, "learning_rate": 7.12374934603015e-06, "epoch": 1.4667841381335716, "total_flos": 1399970610776678400, "step": 364600 }, { "loss": 3.40875, "learning_rate": 7.122938220634219e-06, "epoch": 1.4671864376777661, "total_flos": 1400359043166658560, "step": 364700 }, { "loss": 3.42125, "learning_rate": 7.1221270952382895e-06, "epoch": 1.4675887372219607, "total_flos": 1400758246755901440, "step": 364800 }, { "loss": 3.41, "learning_rate": 7.121315969842359e-06, "epoch": 1.4679910367661553, "total_flos": 1401138436097925120, "step": 364900 }, { "loss": 3.395, "learning_rate": 7.120504844446428e-06, "epoch": 1.46839333631035, "total_flos": 1401510791357644800, "step": 365000 }, { "loss": 3.3675, "learning_rate": 7.119693719050496e-06, "epoch": 1.4687956358545446, "total_flos": 1401882084368916480, "step": 365100 }, { "loss": 3.42625, "learning_rate": 7.118882593654567e-06, "epoch": 1.4691979353987392, "total_flos": 1402273873463992320, "step": 365200 }, { "loss": 3.4425, "learning_rate": 7.118071468258636e-06, "epoch": 1.4696002349429338, "total_flos": 1402661163936890880, "step": 365300 }, { "loss": 3.43375, "learning_rate": 7.1172603428627055e-06, "epoch": 1.4700025344871284, "total_flos": 1403063718919987200, "step": 365400 }, { "loss": 3.37375, "learning_rate": 7.116449217466774e-06, "epoch": 1.470404834031323, "total_flos": 1403416704079257600, "step": 365500 }, { "loss": 3.43, "learning_rate": 7.115638092070845e-06, "epoch": 1.4708071335755177, "total_flos": 1403804106088243200, "step": 365600 }, { "loss": 3.4225, "learning_rate": 7.114826966674914e-06, "epoch": 1.4712094331197123, "total_flos": 1404214208346562560, "step": 365700 }, { "loss": 3.42, "learning_rate": 7.114015841278983e-06, "epoch": 1.4716117326639069, "total_flos": 1404594169305169920, "step": 365800 }, { "loss": 3.3975, "learning_rate": 7.113204715883053e-06, "epoch": 1.4720140322081015, "total_flos": 1404993946508574720, "step": 365900 }, { "loss": 3.4325, "learning_rate": 7.112393590487122e-06, "epoch": 1.4724163317522962, "total_flos": 1405379388669173760, "step": 366000 }, { "loss": 3.38125, "learning_rate": 7.111582465091192e-06, "epoch": 1.4728186312964908, "total_flos": 1405762940027535360, "step": 366100 }, { "loss": 3.3725, "learning_rate": 7.110771339695261e-06, "epoch": 1.4732209308406854, "total_flos": 1406156492455034880, "step": 366200 }, { "loss": 3.36375, "learning_rate": 7.109960214299331e-06, "epoch": 1.47362323038488, "total_flos": 1406534498876497920, "step": 366300 }, { "loss": 3.37125, "learning_rate": 7.1091490889034e-06, "epoch": 1.4740255299290745, "total_flos": 1406919994149519360, "step": 366400 }, { "loss": 3.39, "learning_rate": 7.108337963507469e-06, "epoch": 1.4744278294732691, "total_flos": 1407290968486256640, "step": 366500 }, { "loss": 3.38625, "learning_rate": 7.1075268381115384e-06, "epoch": 1.4748301290174637, "total_flos": 1407671545548963840, "step": 366600 }, { "loss": 3.4475, "learning_rate": 7.1067157127156085e-06, "epoch": 1.4752324285616585, "total_flos": 1408050810734837760, "step": 366700 }, { "loss": 3.395, "learning_rate": 7.105904587319678e-06, "epoch": 1.475634728105853, "total_flos": 1408438743868047360, "step": 366800 }, { "loss": 3.36625, "learning_rate": 7.105093461923747e-06, "epoch": 1.4760370276500476, "total_flos": 1408833432901386240, "step": 366900 }, { "loss": 3.415, "learning_rate": 7.104282336527816e-06, "epoch": 1.4764393271942424, "total_flos": 1409214163990118400, "step": 367000 }, { "loss": 3.39, "learning_rate": 7.103471211131886e-06, "epoch": 1.476841626738437, "total_flos": 1409588224158597120, "step": 367100 }, { "loss": 3.38375, "learning_rate": 7.102660085735955e-06, "epoch": 1.4772439262826316, "total_flos": 1409973384823357440, "step": 367200 }, { "loss": 3.37875, "learning_rate": 7.1018489603400245e-06, "epoch": 1.4776462258268261, "total_flos": 1410378255508070400, "step": 367300 }, { "loss": 3.37875, "learning_rate": 7.101037834944094e-06, "epoch": 1.4780485253710207, "total_flos": 1410750764793815040, "step": 367400 }, { "loss": 3.39625, "learning_rate": 7.100226709548164e-06, "epoch": 1.4784508249152153, "total_flos": 1411112391344209920, "step": 367500 }, { "loss": 3.44125, "learning_rate": 7.099415584152233e-06, "epoch": 1.4788531244594099, "total_flos": 1411489308961013760, "step": 367600 }, { "loss": 3.44, "learning_rate": 7.098604458756302e-06, "epoch": 1.4792554240036047, "total_flos": 1411874336844718080, "step": 367700 }, { "loss": 3.335, "learning_rate": 7.097793333360371e-06, "epoch": 1.4796577235477992, "total_flos": 1412257245542768640, "step": 367800 }, { "loss": 3.41625, "learning_rate": 7.096982207964441e-06, "epoch": 1.4800600230919938, "total_flos": 1412660453808660480, "step": 367900 }, { "loss": 3.43625, "learning_rate": 7.0961710825685106e-06, "epoch": 1.4804623226361884, "total_flos": 1413047802705223680, "step": 368000 }, { "loss": 3.42375, "learning_rate": 7.09535995717258e-06, "epoch": 1.4808646221803832, "total_flos": 1413434631100047360, "step": 368100 }, { "loss": 3.4225, "learning_rate": 7.094548831776649e-06, "epoch": 1.4812669217245777, "total_flos": 1413814666416046080, "step": 368200 }, { "loss": 3.41, "learning_rate": 7.093737706380719e-06, "epoch": 1.4816692212687723, "total_flos": 1414212064182927360, "step": 368300 }, { "loss": 3.41125, "learning_rate": 7.092926580984788e-06, "epoch": 1.482071520812967, "total_flos": 1414591324057559040, "step": 368400 }, { "loss": 3.40125, "learning_rate": 7.092115455588857e-06, "epoch": 1.4824738203571615, "total_flos": 1414983601786920960, "step": 368500 }, { "loss": 3.36625, "learning_rate": 7.091304330192927e-06, "epoch": 1.482876119901356, "total_flos": 1415378073059328000, "step": 368600 }, { "loss": 3.455, "learning_rate": 7.090493204796997e-06, "epoch": 1.4832784194455508, "total_flos": 1415769192937881600, "step": 368700 }, { "loss": 3.4475, "learning_rate": 7.089682079401066e-06, "epoch": 1.4836807189897454, "total_flos": 1416165539078799360, "step": 368800 }, { "loss": 3.4075, "learning_rate": 7.088870954005135e-06, "epoch": 1.48408301853394, "total_flos": 1416538536998830080, "step": 368900 }, { "loss": 3.45125, "learning_rate": 7.088059828609204e-06, "epoch": 1.4844853180781346, "total_flos": 1416917483510169600, "step": 369000 }, { "loss": 3.36875, "learning_rate": 7.087248703213274e-06, "epoch": 1.4848876176223293, "total_flos": 1417300981756108800, "step": 369100 }, { "loss": 3.36875, "learning_rate": 7.0864375778173435e-06, "epoch": 1.485289917166524, "total_flos": 1417675982014464000, "step": 369200 }, { "loss": 3.41375, "learning_rate": 7.085626452421413e-06, "epoch": 1.4856922167107185, "total_flos": 1418068966139043840, "step": 369300 }, { "loss": 3.42375, "learning_rate": 7.084815327025482e-06, "epoch": 1.486094516254913, "total_flos": 1418466050542632960, "step": 369400 }, { "loss": 3.44, "learning_rate": 7.084004201629552e-06, "epoch": 1.4864968157991076, "total_flos": 1418853383505469440, "step": 369500 }, { "loss": 3.395, "learning_rate": 7.083193076233621e-06, "epoch": 1.4868991153433022, "total_flos": 1419232505287802880, "step": 369600 }, { "loss": 3.39, "learning_rate": 7.08238195083769e-06, "epoch": 1.487301414887497, "total_flos": 1419628538065428480, "step": 369700 }, { "loss": 3.39, "learning_rate": 7.0815708254417595e-06, "epoch": 1.4877037144316916, "total_flos": 1420015233679196160, "step": 369800 }, { "loss": 3.36875, "learning_rate": 7.0807597000458295e-06, "epoch": 1.4881060139758862, "total_flos": 1420395019366809600, "step": 369900 }, { "loss": 3.4125, "learning_rate": 7.079948574649899e-06, "epoch": 1.4885083135200807, "total_flos": 1420801313464442880, "step": 370000 }, { "loss": 3.46625, "learning_rate": 7.079137449253968e-06, "epoch": 1.4889106130642755, "total_flos": 1421193973603246080, "step": 370100 }, { "loss": 3.4075, "learning_rate": 7.078326323858037e-06, "epoch": 1.48931291260847, "total_flos": 1421584721694842880, "step": 370200 }, { "loss": 3.4025, "learning_rate": 7.077515198462107e-06, "epoch": 1.4897152121526647, "total_flos": 1421956614876487680, "step": 370300 }, { "loss": 3.405, "learning_rate": 7.076704073066176e-06, "epoch": 1.4901175116968592, "total_flos": 1422345440298393600, "step": 370400 }, { "loss": 3.395, "learning_rate": 7.0758929476702455e-06, "epoch": 1.4905198112410538, "total_flos": 1422742471589560320, "step": 370500 }, { "loss": 3.3875, "learning_rate": 7.075081822274316e-06, "epoch": 1.4909221107852484, "total_flos": 1423133803917803520, "step": 370600 }, { "loss": 3.4225, "learning_rate": 7.074270696878385e-06, "epoch": 1.491324410329443, "total_flos": 1423525980733562880, "step": 370700 }, { "loss": 3.4175, "learning_rate": 7.073459571482454e-06, "epoch": 1.4917267098736378, "total_flos": 1423906116963164160, "step": 370800 }, { "loss": 3.40125, "learning_rate": 7.072648446086523e-06, "epoch": 1.4921290094178323, "total_flos": 1424272613922693120, "step": 370900 }, { "loss": 3.4075, "learning_rate": 7.071837320690593e-06, "epoch": 1.492531308962027, "total_flos": 1424650195444776960, "step": 371000 }, { "loss": 3.4225, "learning_rate": 7.071026195294662e-06, "epoch": 1.4929336085062217, "total_flos": 1425007870430945280, "step": 371100 }, { "loss": 3.42375, "learning_rate": 7.070215069898732e-06, "epoch": 1.4933359080504163, "total_flos": 1425386917855887360, "step": 371200 }, { "loss": 3.4025, "learning_rate": 7.069403944502801e-06, "epoch": 1.4937382075946108, "total_flos": 1425771122497044480, "step": 371300 }, { "loss": 3.4325, "learning_rate": 7.068592819106871e-06, "epoch": 1.4941405071388054, "total_flos": 1426145538518753280, "step": 371400 }, { "loss": 3.40875, "learning_rate": 7.06778169371094e-06, "epoch": 1.494542806683, "total_flos": 1426520597200773120, "step": 371500 }, { "loss": 3.32, "learning_rate": 7.066970568315009e-06, "epoch": 1.4949451062271946, "total_flos": 1426906671399198720, "step": 371600 }, { "loss": 3.40375, "learning_rate": 7.0661594429190784e-06, "epoch": 1.4953474057713891, "total_flos": 1427291773640294400, "step": 371700 }, { "loss": 3.4075, "learning_rate": 7.0653483175231485e-06, "epoch": 1.495749705315584, "total_flos": 1427680641552138240, "step": 371800 }, { "loss": 3.3775, "learning_rate": 7.064537192127218e-06, "epoch": 1.4961520048597785, "total_flos": 1428073636299202560, "step": 371900 }, { "loss": 3.4, "learning_rate": 7.063726066731287e-06, "epoch": 1.496554304403973, "total_flos": 1428474013672980480, "step": 372000 }, { "loss": 3.34375, "learning_rate": 7.062914941335356e-06, "epoch": 1.4969566039481677, "total_flos": 1428859524879728640, "step": 372100 }, { "loss": 3.40125, "learning_rate": 7.062103815939426e-06, "epoch": 1.4973589034923624, "total_flos": 1429263954731335680, "step": 372200 }, { "loss": 3.42125, "learning_rate": 7.061292690543495e-06, "epoch": 1.497761203036557, "total_flos": 1429627259634278400, "step": 372300 }, { "loss": 3.43875, "learning_rate": 7.0604815651475645e-06, "epoch": 1.4981635025807516, "total_flos": 1430016669292830720, "step": 372400 }, { "loss": 3.445, "learning_rate": 7.059670439751634e-06, "epoch": 1.4985658021249462, "total_flos": 1430386337063976960, "step": 372500 }, { "loss": 3.3975, "learning_rate": 7.058859314355704e-06, "epoch": 1.4989681016691407, "total_flos": 1430785508785766400, "step": 372600 }, { "loss": 3.41, "learning_rate": 7.058048188959773e-06, "epoch": 1.4993704012133353, "total_flos": 1431184738931220480, "step": 372700 }, { "loss": 3.415, "learning_rate": 7.057237063563842e-06, "epoch": 1.49977270075753, "total_flos": 1431574260125859840, "step": 372800 }, { "loss": 3.37875, "learning_rate": 7.056425938167911e-06, "epoch": 1.5001750003017247, "total_flos": 1431951661065707520, "step": 372900 }, { "loss": 3.36625, "learning_rate": 7.055614812771981e-06, "epoch": 1.5005772998459193, "total_flos": 1432342063926558720, "step": 373000 }, { "loss": 3.3925, "learning_rate": 7.0548036873760506e-06, "epoch": 1.500979599390114, "total_flos": 1432731022129520640, "step": 373100 }, { "loss": 3.405, "learning_rate": 7.05399256198012e-06, "epoch": 1.5013818989343086, "total_flos": 1433123241435217920, "step": 373200 }, { "loss": 3.34875, "learning_rate": 7.053181436584189e-06, "epoch": 1.5017841984785032, "total_flos": 1433512358975447040, "step": 373300 }, { "loss": 3.39875, "learning_rate": 7.052370311188259e-06, "epoch": 1.5021864980226978, "total_flos": 1433883535139389440, "step": 373400 }, { "loss": 3.38125, "learning_rate": 7.051559185792328e-06, "epoch": 1.5025887975668923, "total_flos": 1434263655435264000, "step": 373500 }, { "loss": 3.41625, "learning_rate": 7.050748060396397e-06, "epoch": 1.502991097111087, "total_flos": 1434658232932515840, "step": 373600 }, { "loss": 3.42875, "learning_rate": 7.049936935000467e-06, "epoch": 1.5033933966552815, "total_flos": 1435044423978270720, "step": 373700 }, { "loss": 3.40375, "learning_rate": 7.049125809604537e-06, "epoch": 1.503795696199476, "total_flos": 1435427146782842880, "step": 373800 }, { "loss": 3.40125, "learning_rate": 7.048314684208606e-06, "epoch": 1.5041979957436709, "total_flos": 1435804441497845760, "step": 373900 }, { "loss": 3.35, "learning_rate": 7.047503558812675e-06, "epoch": 1.5046002952878654, "total_flos": 1436187891942604800, "step": 374000 }, { "loss": 3.3825, "learning_rate": 7.046692433416744e-06, "epoch": 1.50500259483206, "total_flos": 1436564379348787200, "step": 374100 }, { "loss": 3.45375, "learning_rate": 7.045881308020814e-06, "epoch": 1.5054048943762548, "total_flos": 1436934387039436800, "step": 374200 }, { "loss": 3.38, "learning_rate": 7.0450701826248835e-06, "epoch": 1.5058071939204494, "total_flos": 1437313992144814080, "step": 374300 }, { "loss": 3.4275, "learning_rate": 7.044259057228953e-06, "epoch": 1.506209493464644, "total_flos": 1437692997079818240, "step": 374400 }, { "loss": 3.39, "learning_rate": 7.043447931833022e-06, "epoch": 1.5066117930088385, "total_flos": 1438074450497495040, "step": 374500 }, { "loss": 3.37375, "learning_rate": 7.042636806437092e-06, "epoch": 1.507014092553033, "total_flos": 1438467248728596480, "step": 374600 }, { "loss": 3.3725, "learning_rate": 7.041825681041161e-06, "epoch": 1.5074163920972277, "total_flos": 1438849891864535040, "step": 374700 }, { "loss": 3.36875, "learning_rate": 7.04101455564523e-06, "epoch": 1.5078186916414222, "total_flos": 1439218943531581440, "step": 374800 }, { "loss": 3.3525, "learning_rate": 7.0402034302492995e-06, "epoch": 1.508220991185617, "total_flos": 1439607636172431360, "step": 374900 }, { "loss": 3.41875, "learning_rate": 7.0393923048533695e-06, "epoch": 1.5086232907298116, "total_flos": 1439976443522334720, "step": 375000 }, { "loss": 3.395, "learning_rate": 7.038581179457439e-06, "epoch": 1.5090255902740062, "total_flos": 1440358332461875200, "step": 375100 }, { "loss": 3.4225, "learning_rate": 7.037770054061508e-06, "epoch": 1.509427889818201, "total_flos": 1440746254972600320, "step": 375200 }, { "loss": 3.35375, "learning_rate": 7.036958928665578e-06, "epoch": 1.5098301893623955, "total_flos": 1441117617030021120, "step": 375300 }, { "loss": 3.3925, "learning_rate": 7.036147803269647e-06, "epoch": 1.5102324889065901, "total_flos": 1441488676346634240, "step": 375400 }, { "loss": 3.38, "learning_rate": 7.035336677873716e-06, "epoch": 1.5106347884507847, "total_flos": 1441881309929226240, "step": 375500 }, { "loss": 3.365, "learning_rate": 7.0345255524777855e-06, "epoch": 1.5110370879949793, "total_flos": 1442278128770703360, "step": 375600 }, { "loss": 3.37375, "learning_rate": 7.033714427081856e-06, "epoch": 1.5114393875391738, "total_flos": 1442677996265226240, "step": 375700 }, { "loss": 3.38375, "learning_rate": 7.032903301685925e-06, "epoch": 1.5118416870833684, "total_flos": 1443038618990837760, "step": 375800 }, { "loss": 3.42, "learning_rate": 7.032092176289994e-06, "epoch": 1.512243986627563, "total_flos": 1443413698917826560, "step": 375900 }, { "loss": 3.40125, "learning_rate": 7.031281050894063e-06, "epoch": 1.5126462861717578, "total_flos": 1443778044824248320, "step": 376000 }, { "loss": 3.3525, "learning_rate": 7.030469925498133e-06, "epoch": 1.5130485857159524, "total_flos": 1444165478700687360, "step": 376100 }, { "loss": 3.35125, "learning_rate": 7.029658800102202e-06, "epoch": 1.5134508852601471, "total_flos": 1444558308799242240, "step": 376200 }, { "loss": 3.37375, "learning_rate": 7.028847674706272e-06, "epoch": 1.5138531848043417, "total_flos": 1444922755619266560, "step": 376300 }, { "loss": 3.36125, "learning_rate": 7.028036549310341e-06, "epoch": 1.5142554843485363, "total_flos": 1445300969179176960, "step": 376400 }, { "loss": 3.355, "learning_rate": 7.027225423914411e-06, "epoch": 1.5146577838927309, "total_flos": 1445671173385789440, "step": 376500 }, { "loss": 3.40875, "learning_rate": 7.02641429851848e-06, "epoch": 1.5150600834369254, "total_flos": 1446046911906816000, "step": 376600 }, { "loss": 3.3925, "learning_rate": 7.025603173122549e-06, "epoch": 1.51546238298112, "total_flos": 1446424758991011840, "step": 376700 }, { "loss": 3.38375, "learning_rate": 7.0247920477266184e-06, "epoch": 1.5158646825253146, "total_flos": 1446815124673167360, "step": 376800 }, { "loss": 3.3525, "learning_rate": 7.0239809223306885e-06, "epoch": 1.5162669820695092, "total_flos": 1447200418118983680, "step": 376900 }, { "loss": 3.35125, "learning_rate": 7.023169796934758e-06, "epoch": 1.516669281613704, "total_flos": 1447591601732444160, "step": 377000 }, { "loss": 3.3975, "learning_rate": 7.022358671538827e-06, "epoch": 1.5170715811578985, "total_flos": 1447965491941171200, "step": 377100 }, { "loss": 3.3675, "learning_rate": 7.021547546142896e-06, "epoch": 1.5174738807020933, "total_flos": 1448360446536622080, "step": 377200 }, { "loss": 3.38, "learning_rate": 7.020736420746966e-06, "epoch": 1.517876180246288, "total_flos": 1448757068862136320, "step": 377300 }, { "loss": 3.38, "learning_rate": 7.019925295351035e-06, "epoch": 1.5182784797904825, "total_flos": 1449160245260574720, "step": 377400 }, { "loss": 3.32625, "learning_rate": 7.0191141699551045e-06, "epoch": 1.518680779334677, "total_flos": 1449555640689131520, "step": 377500 }, { "loss": 3.4075, "learning_rate": 7.018303044559174e-06, "epoch": 1.5190830788788716, "total_flos": 1449933386859724800, "step": 377600 }, { "loss": 3.42625, "learning_rate": 7.017491919163244e-06, "epoch": 1.5194853784230662, "total_flos": 1450299219913973760, "step": 377700 }, { "loss": 3.36, "learning_rate": 7.016680793767313e-06, "epoch": 1.5198876779672608, "total_flos": 1450693229108305920, "step": 377800 }, { "loss": 3.39875, "learning_rate": 7.015869668371382e-06, "epoch": 1.5202899775114553, "total_flos": 1451089681474068480, "step": 377900 }, { "loss": 3.4175, "learning_rate": 7.015058542975451e-06, "epoch": 1.5206922770556501, "total_flos": 1451460682367016960, "step": 378000 }, { "loss": 3.32375, "learning_rate": 7.014247417579521e-06, "epoch": 1.5210945765998447, "total_flos": 1451836564291584000, "step": 378100 }, { "loss": 3.355, "learning_rate": 7.0134362921835906e-06, "epoch": 1.5214968761440393, "total_flos": 1452236633613312000, "step": 378200 }, { "loss": 3.39875, "learning_rate": 7.01262516678766e-06, "epoch": 1.521899175688234, "total_flos": 1452634562504417280, "step": 378300 }, { "loss": 3.41375, "learning_rate": 7.011814041391729e-06, "epoch": 1.5223014752324286, "total_flos": 1453033665180057600, "step": 378400 }, { "loss": 3.38875, "learning_rate": 7.011002915995799e-06, "epoch": 1.5227037747766232, "total_flos": 1453402164477911040, "step": 378500 }, { "loss": 3.365, "learning_rate": 7.010191790599868e-06, "epoch": 1.5231060743208178, "total_flos": 1453783113327575040, "step": 378600 }, { "loss": 3.365, "learning_rate": 7.009380665203937e-06, "epoch": 1.5235083738650124, "total_flos": 1454166600951029760, "step": 378700 }, { "loss": 3.38, "learning_rate": 7.008569539808007e-06, "epoch": 1.523910673409207, "total_flos": 1454546986809016320, "step": 378800 }, { "loss": 3.3775, "learning_rate": 7.007758414412077e-06, "epoch": 1.5243129729534015, "total_flos": 1454938712169185280, "step": 378900 }, { "loss": 3.3725, "learning_rate": 7.006947289016146e-06, "epoch": 1.5247152724975963, "total_flos": 1455314822477168640, "step": 379000 }, { "loss": 3.405, "learning_rate": 7.006136163620215e-06, "epoch": 1.5251175720417909, "total_flos": 1455701974857768960, "step": 379100 }, { "loss": 3.39875, "learning_rate": 7.005325038224284e-06, "epoch": 1.5255198715859855, "total_flos": 1456074707215687680, "step": 379200 }, { "loss": 3.39625, "learning_rate": 7.004513912828354e-06, "epoch": 1.5259221711301802, "total_flos": 1456446356080189440, "step": 379300 }, { "loss": 3.3575, "learning_rate": 7.0037027874324235e-06, "epoch": 1.5263244706743748, "total_flos": 1456806442370334720, "step": 379400 }, { "loss": 3.40875, "learning_rate": 7.002891662036493e-06, "epoch": 1.5267267702185694, "total_flos": 1457177714136637440, "step": 379500 }, { "loss": 3.42, "learning_rate": 7.002080536640562e-06, "epoch": 1.527129069762764, "total_flos": 1457556575668101120, "step": 379600 }, { "loss": 3.385, "learning_rate": 7.001269411244632e-06, "epoch": 1.5275313693069585, "total_flos": 1457957537278525440, "step": 379700 }, { "loss": 3.375, "learning_rate": 7.000458285848701e-06, "epoch": 1.5279336688511531, "total_flos": 1458330253702717440, "step": 379800 }, { "loss": 3.37375, "learning_rate": 6.99964716045277e-06, "epoch": 1.5283359683953477, "total_flos": 1458699565620633600, "step": 379900 }, { "loss": 3.3625, "learning_rate": 6.99883603505684e-06, "epoch": 1.5287382679395423, "total_flos": 1459073785126379520, "step": 380000 }, { "loss": 3.33, "learning_rate": 6.9980249096609095e-06, "epoch": 1.529140567483737, "total_flos": 1459468718476861440, "step": 380100 }, { "loss": 3.38875, "learning_rate": 6.997213784264979e-06, "epoch": 1.5295428670279316, "total_flos": 1459841164027699200, "step": 380200 }, { "loss": 3.4025, "learning_rate": 6.996402658869048e-06, "epoch": 1.5299451665721264, "total_flos": 1460211240764497920, "step": 380300 }, { "loss": 3.3575, "learning_rate": 6.995591533473118e-06, "epoch": 1.530347466116321, "total_flos": 1460584392710553600, "step": 380400 }, { "loss": 3.35, "learning_rate": 6.994780408077187e-06, "epoch": 1.5307497656605156, "total_flos": 1460964210265620480, "step": 380500 }, { "loss": 3.39875, "learning_rate": 6.993969282681256e-06, "epoch": 1.5311520652047101, "total_flos": 1461347777557708800, "step": 380600 }, { "loss": 3.35625, "learning_rate": 6.9931581572853255e-06, "epoch": 1.5315543647489047, "total_flos": 1461730882771722240, "step": 380700 }, { "loss": 3.37875, "learning_rate": 6.992347031889396e-06, "epoch": 1.5319566642930993, "total_flos": 1462117764278968320, "step": 380800 }, { "loss": 3.3875, "learning_rate": 6.991535906493465e-06, "epoch": 1.5323589638372939, "total_flos": 1462506584389632000, "step": 380900 }, { "loss": 3.31625, "learning_rate": 6.990724781097534e-06, "epoch": 1.5327612633814884, "total_flos": 1462873453136117760, "step": 381000 }, { "loss": 3.37, "learning_rate": 6.989913655701603e-06, "epoch": 1.5331635629256832, "total_flos": 1463270282600079360, "step": 381100 }, { "loss": 3.38375, "learning_rate": 6.989102530305674e-06, "epoch": 1.5335658624698778, "total_flos": 1463649080396636160, "step": 381200 }, { "loss": 3.3275, "learning_rate": 6.988291404909742e-06, "epoch": 1.5339681620140726, "total_flos": 1464031835068661760, "step": 381300 }, { "loss": 3.3875, "learning_rate": 6.987480279513812e-06, "epoch": 1.5343704615582672, "total_flos": 1464401651554590720, "step": 381400 }, { "loss": 3.35875, "learning_rate": 6.986669154117881e-06, "epoch": 1.5347727611024617, "total_flos": 1464788681776619520, "step": 381500 }, { "loss": 3.3575, "learning_rate": 6.985858028721952e-06, "epoch": 1.5351750606466563, "total_flos": 1465171547984732160, "step": 381600 }, { "loss": 3.3775, "learning_rate": 6.98504690332602e-06, "epoch": 1.535577360190851, "total_flos": 1465537110165626880, "step": 381700 }, { "loss": 3.38, "learning_rate": 6.984235777930089e-06, "epoch": 1.5359796597350455, "total_flos": 1465910522362552320, "step": 381800 }, { "loss": 3.4025, "learning_rate": 6.9834246525341584e-06, "epoch": 1.53638195927924, "total_flos": 1466301536016261120, "step": 381900 }, { "loss": 3.35, "learning_rate": 6.982613527138229e-06, "epoch": 1.5367842588234346, "total_flos": 1466679388411699200, "step": 382000 }, { "loss": 3.39, "learning_rate": 6.981802401742298e-06, "epoch": 1.5371865583676294, "total_flos": 1467057421389373440, "step": 382100 }, { "loss": 3.3775, "learning_rate": 6.980991276346367e-06, "epoch": 1.537588857911824, "total_flos": 1467452699970600960, "step": 382200 }, { "loss": 3.37875, "learning_rate": 6.980180150950436e-06, "epoch": 1.5379911574560186, "total_flos": 1467832984914984960, "step": 382300 }, { "loss": 3.385, "learning_rate": 6.979369025554507e-06, "epoch": 1.5383934570002133, "total_flos": 1468223876410122240, "step": 382400 }, { "loss": 3.355, "learning_rate": 6.978557900158575e-06, "epoch": 1.538795756544408, "total_flos": 1468607810177925120, "step": 382500 }, { "loss": 3.38375, "learning_rate": 6.9777467747626445e-06, "epoch": 1.5391980560886025, "total_flos": 1469007024389652480, "step": 382600 }, { "loss": 3.37875, "learning_rate": 6.976935649366714e-06, "epoch": 1.539600355632797, "total_flos": 1469380319739248640, "step": 382700 }, { "loss": 3.3225, "learning_rate": 6.976124523970785e-06, "epoch": 1.5400026551769916, "total_flos": 1469781068899983360, "step": 382800 }, { "loss": 3.3925, "learning_rate": 6.975313398574853e-06, "epoch": 1.5404049547211862, "total_flos": 1470174791287234560, "step": 382900 }, { "loss": 3.38375, "learning_rate": 6.974502273178922e-06, "epoch": 1.5408072542653808, "total_flos": 1470558103639695360, "step": 383000 }, { "loss": 3.3925, "learning_rate": 6.973691147782991e-06, "epoch": 1.5412095538095756, "total_flos": 1470937613142712320, "step": 383100 }, { "loss": 3.41375, "learning_rate": 6.972880022387062e-06, "epoch": 1.5416118533537702, "total_flos": 1471341846478356480, "step": 383200 }, { "loss": 3.32125, "learning_rate": 6.9720688969911306e-06, "epoch": 1.5420141528979647, "total_flos": 1471730825926287360, "step": 383300 }, { "loss": 3.3875, "learning_rate": 6.9712577715952e-06, "epoch": 1.5424164524421595, "total_flos": 1472096611179356160, "step": 383400 }, { "loss": 3.355, "learning_rate": 6.970446646199269e-06, "epoch": 1.542818751986354, "total_flos": 1472495464226611200, "step": 383500 }, { "loss": 3.3925, "learning_rate": 6.96963552080334e-06, "epoch": 1.5432210515305487, "total_flos": 1472881310041620480, "step": 383600 }, { "loss": 3.3375, "learning_rate": 6.968824395407409e-06, "epoch": 1.5436233510747432, "total_flos": 1473264563970416640, "step": 383700 }, { "loss": 3.38, "learning_rate": 6.968013270011477e-06, "epoch": 1.5440256506189378, "total_flos": 1473653920516546560, "step": 383800 }, { "loss": 3.3075, "learning_rate": 6.967202144615547e-06, "epoch": 1.5444279501631324, "total_flos": 1474021469102039040, "step": 383900 }, { "loss": 3.3825, "learning_rate": 6.9663910192196175e-06, "epoch": 1.544830249707327, "total_flos": 1474404531826114560, "step": 384000 }, { "loss": 3.31, "learning_rate": 6.965579893823687e-06, "epoch": 1.5452325492515215, "total_flos": 1474797951472558080, "step": 384100 }, { "loss": 3.365, "learning_rate": 6.964768768427755e-06, "epoch": 1.5456348487957163, "total_flos": 1475180844236881920, "step": 384200 }, { "loss": 3.365, "learning_rate": 6.963957643031824e-06, "epoch": 1.546037148339911, "total_flos": 1475564337171578880, "step": 384300 }, { "loss": 3.3675, "learning_rate": 6.963146517635895e-06, "epoch": 1.5464394478841057, "total_flos": 1475948270939381760, "step": 384400 }, { "loss": 3.36375, "learning_rate": 6.962335392239964e-06, "epoch": 1.5468417474283003, "total_flos": 1476324285645004800, "step": 384500 }, { "loss": 3.33875, "learning_rate": 6.961524266844033e-06, "epoch": 1.5472440469724948, "total_flos": 1476698531706961920, "step": 384600 }, { "loss": 3.3825, "learning_rate": 6.9607131414481035e-06, "epoch": 1.5476463465166894, "total_flos": 1477069713182146560, "step": 384700 }, { "loss": 3.34375, "learning_rate": 6.959902016052173e-06, "epoch": 1.548048646060884, "total_flos": 1477447677113671680, "step": 384800 }, { "loss": 3.4075, "learning_rate": 6.959090890656242e-06, "epoch": 1.5484509456050786, "total_flos": 1477836640627875840, "step": 384900 }, { "loss": 3.34625, "learning_rate": 6.95827976526031e-06, "epoch": 1.5488532451492731, "total_flos": 1478223777074749440, "step": 385000 }, { "loss": 3.3875, "learning_rate": 6.957468639864381e-06, "epoch": 1.5492555446934677, "total_flos": 1478620075414487040, "step": 385100 }, { "loss": 3.345, "learning_rate": 6.95665751446845e-06, "epoch": 1.5496578442376625, "total_flos": 1478996674356756480, "step": 385200 }, { "loss": 3.31625, "learning_rate": 6.9558463890725196e-06, "epoch": 1.550060143781857, "total_flos": 1479370994776104960, "step": 385300 }, { "loss": 3.3725, "learning_rate": 6.955035263676588e-06, "epoch": 1.5504624433260519, "total_flos": 1479748369159741440, "step": 385400 }, { "loss": 3.38125, "learning_rate": 6.954224138280659e-06, "epoch": 1.5508647428702464, "total_flos": 1480125451425054720, "step": 385500 }, { "loss": 3.37, "learning_rate": 6.953413012884728e-06, "epoch": 1.551267042414441, "total_flos": 1480503452535275520, "step": 385600 }, { "loss": 3.39, "learning_rate": 6.952601887488797e-06, "epoch": 1.5516693419586356, "total_flos": 1480876524812697600, "step": 385700 }, { "loss": 3.32875, "learning_rate": 6.9517907620928655e-06, "epoch": 1.5520716415028302, "total_flos": 1481268112080568320, "step": 385800 }, { "loss": 3.38875, "learning_rate": 6.9509796366969364e-06, "epoch": 1.5524739410470247, "total_flos": 1481670842334658560, "step": 385900 }, { "loss": 3.28125, "learning_rate": 6.950168511301006e-06, "epoch": 1.5528762405912193, "total_flos": 1482057028069171200, "step": 386000 }, { "loss": 3.39125, "learning_rate": 6.949357385905075e-06, "epoch": 1.5532785401354139, "total_flos": 1482437058073927680, "step": 386100 }, { "loss": 3.38875, "learning_rate": 6.948546260509144e-06, "epoch": 1.5536808396796087, "total_flos": 1482810693343027200, "step": 386200 }, { "loss": 3.43375, "learning_rate": 6.947735135113214e-06, "epoch": 1.5540831392238033, "total_flos": 1483181157800509440, "step": 386300 }, { "loss": 3.3475, "learning_rate": 6.946924009717283e-06, "epoch": 1.5544854387679978, "total_flos": 1483580924381429760, "step": 386400 }, { "loss": 3.3525, "learning_rate": 6.9461128843213525e-06, "epoch": 1.5548877383121926, "total_flos": 1483976022380421120, "step": 386500 }, { "loss": 3.3525, "learning_rate": 6.945301758925422e-06, "epoch": 1.5552900378563872, "total_flos": 1484366919186800640, "step": 386600 }, { "loss": 3.35875, "learning_rate": 6.944490633529492e-06, "epoch": 1.5556923374005818, "total_flos": 1484755988925849600, "step": 386700 }, { "loss": 3.39375, "learning_rate": 6.943679508133561e-06, "epoch": 1.5560946369447763, "total_flos": 1485141006187069440, "step": 386800 }, { "loss": 3.33, "learning_rate": 6.94286838273763e-06, "epoch": 1.556496936488971, "total_flos": 1485513324268093440, "step": 386900 }, { "loss": 3.405, "learning_rate": 6.942057257341699e-06, "epoch": 1.5568992360331655, "total_flos": 1485913791932989440, "step": 387000 }, { "loss": 3.34375, "learning_rate": 6.941246131945769e-06, "epoch": 1.55730153557736, "total_flos": 1486288478828052480, "step": 387100 }, { "loss": 3.355, "learning_rate": 6.9404350065498385e-06, "epoch": 1.5577038351215549, "total_flos": 1486649855750062080, "step": 387200 }, { "loss": 3.3675, "learning_rate": 6.939623881153908e-06, "epoch": 1.5581061346657494, "total_flos": 1487034464045629440, "step": 387300 }, { "loss": 3.33, "learning_rate": 6.938812755757977e-06, "epoch": 1.558508434209944, "total_flos": 1487398639992299520, "step": 387400 }, { "loss": 3.38375, "learning_rate": 6.938001630362047e-06, "epoch": 1.5589107337541388, "total_flos": 1487775456695500800, "step": 387500 }, { "loss": 3.365, "learning_rate": 6.937190504966116e-06, "epoch": 1.5593130332983334, "total_flos": 1488175095806607360, "step": 387600 }, { "loss": 3.3475, "learning_rate": 6.936379379570185e-06, "epoch": 1.559715332842528, "total_flos": 1488561865777766400, "step": 387700 }, { "loss": 3.36125, "learning_rate": 6.9355682541742545e-06, "epoch": 1.5601176323867225, "total_flos": 1488939542902210560, "step": 387800 }, { "loss": 3.375, "learning_rate": 6.934757128778325e-06, "epoch": 1.560519931930917, "total_flos": 1489327460101693440, "step": 387900 }, { "loss": 3.38, "learning_rate": 6.933946003382394e-06, "epoch": 1.5609222314751117, "total_flos": 1489720486716211200, "step": 388000 }, { "loss": 3.32125, "learning_rate": 6.933134877986463e-06, "epoch": 1.5613245310193062, "total_flos": 1490111712819609600, "step": 388100 }, { "loss": 3.375, "learning_rate": 6.932323752590532e-06, "epoch": 1.5617268305635008, "total_flos": 1490488529522810880, "step": 388200 }, { "loss": 3.38625, "learning_rate": 6.931512627194602e-06, "epoch": 1.5621291301076956, "total_flos": 1490883659389255680, "step": 388300 }, { "loss": 3.3025, "learning_rate": 6.930701501798671e-06, "epoch": 1.5625314296518902, "total_flos": 1491259243884257280, "step": 388400 }, { "loss": 3.32125, "learning_rate": 6.929890376402741e-06, "epoch": 1.562933729196085, "total_flos": 1491649142177095680, "step": 388500 }, { "loss": 3.38375, "learning_rate": 6.92907925100681e-06, "epoch": 1.5633360287402795, "total_flos": 1492023372305326080, "step": 388600 }, { "loss": 3.3075, "learning_rate": 6.92826812561088e-06, "epoch": 1.5637383282844741, "total_flos": 1492424397650657280, "step": 388700 }, { "loss": 3.35625, "learning_rate": 6.927457000214949e-06, "epoch": 1.5641406278286687, "total_flos": 1492802096020070400, "step": 388800 }, { "loss": 3.32375, "learning_rate": 6.926645874819018e-06, "epoch": 1.5645429273728633, "total_flos": 1493177117523394560, "step": 388900 }, { "loss": 3.38375, "learning_rate": 6.9258347494230874e-06, "epoch": 1.5649452269170578, "total_flos": 1493560424564613120, "step": 389000 }, { "loss": 3.355, "learning_rate": 6.9250236240271575e-06, "epoch": 1.5653475264612524, "total_flos": 1493941941717196800, "step": 389100 }, { "loss": 3.38875, "learning_rate": 6.924212498631227e-06, "epoch": 1.565749826005447, "total_flos": 1494302984030945280, "step": 389200 }, { "loss": 3.345, "learning_rate": 6.923401373235296e-06, "epoch": 1.5661521255496418, "total_flos": 1494678053335449600, "step": 389300 }, { "loss": 3.4575, "learning_rate": 6.922590247839366e-06, "epoch": 1.5665544250938364, "total_flos": 1495074941223075840, "step": 389400 }, { "loss": 3.32375, "learning_rate": 6.921779122443435e-06, "epoch": 1.5669567246380312, "total_flos": 1495476481758904320, "step": 389500 }, { "loss": 3.345, "learning_rate": 6.920967997047504e-06, "epoch": 1.5673590241822257, "total_flos": 1495849766486016000, "step": 389600 }, { "loss": 3.4075, "learning_rate": 6.9201568716515735e-06, "epoch": 1.5677613237264203, "total_flos": 1496234449138974720, "step": 389700 }, { "loss": 3.3225, "learning_rate": 6.9193457462556435e-06, "epoch": 1.5681636232706149, "total_flos": 1496635097386106880, "step": 389800 }, { "loss": 3.33375, "learning_rate": 6.918534620859713e-06, "epoch": 1.5685659228148094, "total_flos": 1497015084900925440, "step": 389900 }, { "loss": 3.36875, "learning_rate": 6.917723495463782e-06, "epoch": 1.568968222359004, "total_flos": 1497407973423144960, "step": 390000 }, { "loss": 3.36, "learning_rate": 6.916912370067851e-06, "epoch": 1.5693705219031986, "total_flos": 1497786914623242240, "step": 390100 }, { "loss": 3.36, "learning_rate": 6.916101244671921e-06, "epoch": 1.5697728214473932, "total_flos": 1498179107372728320, "step": 390200 }, { "loss": 3.33375, "learning_rate": 6.91529011927599e-06, "epoch": 1.570175120991588, "total_flos": 1498569993556623360, "step": 390300 }, { "loss": 3.36375, "learning_rate": 6.9144789938800596e-06, "epoch": 1.5705774205357825, "total_flos": 1498953497113804800, "step": 390400 }, { "loss": 3.335, "learning_rate": 6.913667868484129e-06, "epoch": 1.570979720079977, "total_flos": 1499332873835765760, "step": 390500 }, { "loss": 3.33625, "learning_rate": 6.912856743088199e-06, "epoch": 1.571382019624172, "total_flos": 1499714900867604480, "step": 390600 }, { "loss": 3.3775, "learning_rate": 6.912045617692268e-06, "epoch": 1.5717843191683665, "total_flos": 1500103582885969920, "step": 390700 }, { "loss": 3.3375, "learning_rate": 6.911234492296337e-06, "epoch": 1.572186618712561, "total_flos": 1500483862519111680, "step": 390800 }, { "loss": 3.37, "learning_rate": 6.910423366900406e-06, "epoch": 1.5725889182567556, "total_flos": 1500864683898961920, "step": 390900 }, { "loss": 3.38375, "learning_rate": 6.9096122415044764e-06, "epoch": 1.5729912178009502, "total_flos": 1501249600246579200, "step": 391000 }, { "loss": 3.3825, "learning_rate": 6.908801116108546e-06, "epoch": 1.5733935173451448, "total_flos": 1501630575652454400, "step": 391100 }, { "loss": 3.375, "learning_rate": 6.907989990712615e-06, "epoch": 1.5737958168893393, "total_flos": 1502006489444474880, "step": 391200 }, { "loss": 3.39625, "learning_rate": 6.907178865316684e-06, "epoch": 1.5741981164335341, "total_flos": 1502395070549237760, "step": 391300 }, { "loss": 3.3475, "learning_rate": 6.906367739920754e-06, "epoch": 1.5746004159777287, "total_flos": 1502797051918172160, "step": 391400 }, { "loss": 3.3375, "learning_rate": 6.905556614524823e-06, "epoch": 1.5750027155219233, "total_flos": 1503175408881623040, "step": 391500 }, { "loss": 3.39, "learning_rate": 6.9047454891288925e-06, "epoch": 1.575405015066118, "total_flos": 1503557138483896320, "step": 391600 }, { "loss": 3.28125, "learning_rate": 6.903934363732962e-06, "epoch": 1.5758073146103126, "total_flos": 1503939648838778880, "step": 391700 }, { "loss": 3.355, "learning_rate": 6.903123238337032e-06, "epoch": 1.5762096141545072, "total_flos": 1504313608093655040, "step": 391800 }, { "loss": 3.34, "learning_rate": 6.902312112941101e-06, "epoch": 1.5766119136987018, "total_flos": 1504681804650700800, "step": 391900 }, { "loss": 3.3375, "learning_rate": 6.90150098754517e-06, "epoch": 1.5770142132428964, "total_flos": 1505052439067934720, "step": 392000 }, { "loss": 3.31125, "learning_rate": 6.900689862149239e-06, "epoch": 1.577416512787091, "total_flos": 1505426398322810880, "step": 392100 }, { "loss": 3.34625, "learning_rate": 6.899878736753309e-06, "epoch": 1.5778188123312855, "total_flos": 1505811548365086720, "step": 392200 }, { "loss": 3.40875, "learning_rate": 6.8990676113573785e-06, "epoch": 1.57822111187548, "total_flos": 1506195280305684480, "step": 392300 }, { "loss": 3.36, "learning_rate": 6.898256485961448e-06, "epoch": 1.5786234114196749, "total_flos": 1506569675082424320, "step": 392400 }, { "loss": 3.305, "learning_rate": 6.897445360565517e-06, "epoch": 1.5790257109638695, "total_flos": 1506956816840540160, "step": 392500 }, { "loss": 3.30375, "learning_rate": 6.896634235169587e-06, "epoch": 1.5794280105080643, "total_flos": 1507356742758727680, "step": 392600 }, { "loss": 3.33375, "learning_rate": 6.895823109773656e-06, "epoch": 1.5798303100522588, "total_flos": 1507742562017525760, "step": 392700 }, { "loss": 3.31375, "learning_rate": 6.895011984377725e-06, "epoch": 1.5802326095964534, "total_flos": 1508141824030433280, "step": 392800 }, { "loss": 3.40125, "learning_rate": 6.8942008589817945e-06, "epoch": 1.580634909140648, "total_flos": 1508527999142461440, "step": 392900 }, { "loss": 3.33625, "learning_rate": 6.893389733585865e-06, "epoch": 1.5810372086848425, "total_flos": 1508919012796170240, "step": 393000 }, { "loss": 3.41875, "learning_rate": 6.892578608189934e-06, "epoch": 1.5814395082290371, "total_flos": 1509308167515095040, "step": 393100 }, { "loss": 3.30625, "learning_rate": 6.891767482794003e-06, "epoch": 1.5818418077732317, "total_flos": 1509697040738181120, "step": 393200 }, { "loss": 3.35625, "learning_rate": 6.890956357398072e-06, "epoch": 1.5822441073174263, "total_flos": 1510072449962188800, "step": 393300 }, { "loss": 3.3125, "learning_rate": 6.890145232002142e-06, "epoch": 1.582646406861621, "total_flos": 1510446759759052800, "step": 393400 }, { "loss": 3.325, "learning_rate": 6.889334106606211e-06, "epoch": 1.5830487064058156, "total_flos": 1510842792536678400, "step": 393500 }, { "loss": 3.33875, "learning_rate": 6.888522981210281e-06, "epoch": 1.5834510059500102, "total_flos": 1511237247875358720, "step": 393600 }, { "loss": 3.3475, "learning_rate": 6.88771185581435e-06, "epoch": 1.583853305494205, "total_flos": 1511620597406515200, "step": 393700 }, { "loss": 3.2975, "learning_rate": 6.88690073041842e-06, "epoch": 1.5842556050383996, "total_flos": 1511999039349841920, "step": 393800 }, { "loss": 3.31, "learning_rate": 6.886089605022489e-06, "epoch": 1.5846579045825941, "total_flos": 1512387811659325440, "step": 393900 }, { "loss": 3.36125, "learning_rate": 6.885278479626558e-06, "epoch": 1.5850602041267887, "total_flos": 1512766519164764160, "step": 394000 }, { "loss": 3.375, "learning_rate": 6.884467354230628e-06, "epoch": 1.5854625036709833, "total_flos": 1513150086456852480, "step": 394100 }, { "loss": 3.3325, "learning_rate": 6.8836562288346975e-06, "epoch": 1.5858648032151779, "total_flos": 1513542815641804800, "step": 394200 }, { "loss": 3.335, "learning_rate": 6.882845103438767e-06, "epoch": 1.5862671027593724, "total_flos": 1513936357446819840, "step": 394300 }, { "loss": 3.30625, "learning_rate": 6.882033978042836e-06, "epoch": 1.5866694023035672, "total_flos": 1514324949174067200, "step": 394400 }, { "loss": 3.32, "learning_rate": 6.881222852646906e-06, "epoch": 1.5870717018477618, "total_flos": 1514710609095598080, "step": 394500 }, { "loss": 3.34625, "learning_rate": 6.880411727250975e-06, "epoch": 1.5874740013919564, "total_flos": 1515107204864901120, "step": 394600 }, { "loss": 3.38625, "learning_rate": 6.879600601855044e-06, "epoch": 1.5878763009361512, "total_flos": 1515498781510287360, "step": 394700 }, { "loss": 3.4425, "learning_rate": 6.8787894764591135e-06, "epoch": 1.5882786004803457, "total_flos": 1515886555306229760, "step": 394800 }, { "loss": 3.3, "learning_rate": 6.8779783510631835e-06, "epoch": 1.5886809000245403, "total_flos": 1516274387525836800, "step": 394900 }, { "loss": 3.33875, "learning_rate": 6.877167225667253e-06, "epoch": 1.589083199568735, "total_flos": 1516676076776448000, "step": 395000 }, { "loss": 3.33875, "learning_rate": 6.876356100271322e-06, "epoch": 1.5894854991129295, "total_flos": 1517065629838540800, "step": 395100 }, { "loss": 3.37375, "learning_rate": 6.875544974875391e-06, "epoch": 1.589887798657124, "total_flos": 1517451773083115520, "step": 395200 }, { "loss": 3.30125, "learning_rate": 6.874733849479461e-06, "epoch": 1.5902900982013186, "total_flos": 1517834963277004800, "step": 395300 }, { "loss": 3.365, "learning_rate": 6.87392272408353e-06, "epoch": 1.5906923977455134, "total_flos": 1518215141996544000, "step": 395400 }, { "loss": 3.2825, "learning_rate": 6.8731115986875996e-06, "epoch": 1.591094697289708, "total_flos": 1518598757089812480, "step": 395500 }, { "loss": 3.3375, "learning_rate": 6.872300473291669e-06, "epoch": 1.5914969968339026, "total_flos": 1518978112566804480, "step": 395600 }, { "loss": 3.34, "learning_rate": 6.871489347895739e-06, "epoch": 1.5918992963780974, "total_flos": 1519359990883860480, "step": 395700 }, { "loss": 3.3275, "learning_rate": 6.870678222499808e-06, "epoch": 1.592301595922292, "total_flos": 1519759215718072320, "step": 395800 }, { "loss": 3.3625, "learning_rate": 6.869867097103877e-06, "epoch": 1.5927038954664865, "total_flos": 1520135979308851200, "step": 395900 }, { "loss": 3.38, "learning_rate": 6.869055971707946e-06, "epoch": 1.593106195010681, "total_flos": 1520512684475965440, "step": 396000 }, { "loss": 3.355, "learning_rate": 6.8682448463120164e-06, "epoch": 1.5935084945548756, "total_flos": 1520900718522777600, "step": 396100 }, { "loss": 3.29375, "learning_rate": 6.867433720916086e-06, "epoch": 1.5939107940990702, "total_flos": 1521278655898091520, "step": 396200 }, { "loss": 3.3525, "learning_rate": 6.866622595520155e-06, "epoch": 1.5943130936432648, "total_flos": 1521665330266890240, "step": 396300 }, { "loss": 3.34375, "learning_rate": 6.865811470124224e-06, "epoch": 1.5947153931874594, "total_flos": 1522054251291156480, "step": 396400 }, { "loss": 3.365, "learning_rate": 6.865000344728294e-06, "epoch": 1.5951176927316542, "total_flos": 1522426112605347840, "step": 396500 }, { "loss": 3.3425, "learning_rate": 6.864189219332363e-06, "epoch": 1.5955199922758487, "total_flos": 1522810264134082560, "step": 396600 }, { "loss": 3.33375, "learning_rate": 6.8633780939364325e-06, "epoch": 1.5959222918200435, "total_flos": 1523187702252625920, "step": 396700 }, { "loss": 3.33625, "learning_rate": 6.862566968540502e-06, "epoch": 1.596324591364238, "total_flos": 1523577584611737600, "step": 396800 }, { "loss": 3.3175, "learning_rate": 6.861755843144572e-06, "epoch": 1.5967268909084327, "total_flos": 1523958799023513600, "step": 396900 }, { "loss": 3.39875, "learning_rate": 6.860944717748641e-06, "epoch": 1.5971291904526272, "total_flos": 1524343014287155200, "step": 397000 }, { "loss": 3.34125, "learning_rate": 6.86013359235271e-06, "epoch": 1.5975314899968218, "total_flos": 1524727489801666560, "step": 397100 }, { "loss": 3.355, "learning_rate": 6.859322466956779e-06, "epoch": 1.5979337895410164, "total_flos": 1525096907944427520, "step": 397200 }, { "loss": 3.35125, "learning_rate": 6.858511341560849e-06, "epoch": 1.598336089085211, "total_flos": 1525468344359239680, "step": 397300 }, { "loss": 3.385, "learning_rate": 6.8577002161649185e-06, "epoch": 1.5987383886294055, "total_flos": 1525868079072706560, "step": 397400 }, { "loss": 3.365, "learning_rate": 6.856889090768988e-06, "epoch": 1.5991406881736003, "total_flos": 1526258981190328320, "step": 397500 }, { "loss": 3.33125, "learning_rate": 6.856077965373057e-06, "epoch": 1.599542987717795, "total_flos": 1526637486868561920, "step": 397600 }, { "loss": 3.32, "learning_rate": 6.855266839977127e-06, "epoch": 1.5999452872619895, "total_flos": 1527023842562826240, "step": 397700 }, { "loss": 3.34, "learning_rate": 6.854455714581196e-06, "epoch": 1.6003475868061843, "total_flos": 1527407022134231040, "step": 397800 }, { "loss": 3.33125, "learning_rate": 6.853644589185265e-06, "epoch": 1.6007498863503788, "total_flos": 1527786999026565120, "step": 397900 }, { "loss": 3.35375, "learning_rate": 6.8528334637893345e-06, "epoch": 1.6011521858945734, "total_flos": 1528178464135864320, "step": 398000 }, { "loss": 3.2925, "learning_rate": 6.852022338393405e-06, "epoch": 1.601554485438768, "total_flos": 1528584524538839040, "step": 398100 }, { "loss": 3.35125, "learning_rate": 6.851211212997474e-06, "epoch": 1.6019567849829626, "total_flos": 1528974284739379200, "step": 398200 }, { "loss": 3.395, "learning_rate": 6.850400087601543e-06, "epoch": 1.6023590845271571, "total_flos": 1529358117593579520, "step": 398300 }, { "loss": 3.32625, "learning_rate": 6.849588962205612e-06, "epoch": 1.6027613840713517, "total_flos": 1529739496653864960, "step": 398400 }, { "loss": 3.35875, "learning_rate": 6.848777836809682e-06, "epoch": 1.6031636836155465, "total_flos": 1530109668993024000, "step": 398500 }, { "loss": 3.32375, "learning_rate": 6.847966711413751e-06, "epoch": 1.603565983159741, "total_flos": 1530497453411450880, "step": 398600 }, { "loss": 3.34375, "learning_rate": 6.847155586017821e-06, "epoch": 1.6039682827039357, "total_flos": 1530864927639552000, "step": 398700 }, { "loss": 3.325, "learning_rate": 6.846344460621891e-06, "epoch": 1.6043705822481305, "total_flos": 1531254416966737920, "step": 398800 }, { "loss": 3.35875, "learning_rate": 6.84553333522596e-06, "epoch": 1.604772881792325, "total_flos": 1531655665384243200, "step": 398900 }, { "loss": 3.34375, "learning_rate": 6.844722209830029e-06, "epoch": 1.6051751813365196, "total_flos": 1532034080771358720, "step": 399000 }, { "loss": 3.34625, "learning_rate": 6.843911084434098e-06, "epoch": 1.6055774808807142, "total_flos": 1532415157090836480, "step": 399100 }, { "loss": 3.34625, "learning_rate": 6.843099959038168e-06, "epoch": 1.6059797804249087, "total_flos": 1532811370450698240, "step": 399200 }, { "loss": 3.38125, "learning_rate": 6.8422888336422375e-06, "epoch": 1.6063820799691033, "total_flos": 1533209702996213760, "step": 399300 }, { "loss": 3.37, "learning_rate": 6.841477708246307e-06, "epoch": 1.606784379513298, "total_flos": 1533608901274214400, "step": 399400 }, { "loss": 3.32875, "learning_rate": 6.840666582850376e-06, "epoch": 1.6071866790574925, "total_flos": 1533980932548157440, "step": 399500 }, { "loss": 3.335, "learning_rate": 6.839855457454446e-06, "epoch": 1.6075889786016873, "total_flos": 1534375674693918720, "step": 399600 }, { "loss": 3.3125, "learning_rate": 6.839044332058515e-06, "epoch": 1.6079912781458818, "total_flos": 1534748460164259840, "step": 399700 }, { "loss": 3.34625, "learning_rate": 6.838233206662584e-06, "epoch": 1.6083935776900766, "total_flos": 1535135145155543040, "step": 399800 }, { "loss": 3.37875, "learning_rate": 6.8374220812666535e-06, "epoch": 1.6087958772342712, "total_flos": 1535521113129123840, "step": 399900 }, { "loss": 3.32375, "learning_rate": 6.8366109558707235e-06, "epoch": 1.6091981767784658, "total_flos": 1535921007179857920, "step": 400000 }, { "loss": 3.31875, "learning_rate": 6.835799830474793e-06, "epoch": 1.6096004763226603, "total_flos": 1536293851073863680, "step": 400100 }, { "loss": 3.37, "learning_rate": 6.834988705078862e-06, "epoch": 1.610002775866855, "total_flos": 1536668065268367360, "step": 400200 }, { "loss": 3.3325, "learning_rate": 6.834177579682931e-06, "epoch": 1.6104050754110495, "total_flos": 1537060072124375040, "step": 400300 }, { "loss": 3.31125, "learning_rate": 6.833366454287001e-06, "epoch": 1.610807374955244, "total_flos": 1537429633670676480, "step": 400400 }, { "loss": 3.36, "learning_rate": 6.83255532889107e-06, "epoch": 1.6112096744994386, "total_flos": 1537818910548172800, "step": 400500 }, { "loss": 3.32375, "learning_rate": 6.8317442034951396e-06, "epoch": 1.6116119740436334, "total_flos": 1538203789717094400, "step": 400600 }, { "loss": 3.315, "learning_rate": 6.830933078099209e-06, "epoch": 1.612014273587828, "total_flos": 1538573298150973440, "step": 400700 }, { "loss": 3.355, "learning_rate": 6.830121952703279e-06, "epoch": 1.6124165731320228, "total_flos": 1538962229797724160, "step": 400800 }, { "loss": 3.36375, "learning_rate": 6.829310827307348e-06, "epoch": 1.6128188726762174, "total_flos": 1539349063503790080, "step": 400900 }, { "loss": 3.28, "learning_rate": 6.828499701911417e-06, "epoch": 1.613221172220412, "total_flos": 1539736539870167040, "step": 401000 }, { "loss": 3.325, "learning_rate": 6.827688576515486e-06, "epoch": 1.6136234717646065, "total_flos": 1540108857951191040, "step": 401100 }, { "loss": 3.3675, "learning_rate": 6.8268774511195564e-06, "epoch": 1.614025771308801, "total_flos": 1540482493220290560, "step": 401200 }, { "loss": 3.31, "learning_rate": 6.826066325723626e-06, "epoch": 1.6144280708529957, "total_flos": 1540862225795481600, "step": 401300 }, { "loss": 3.31375, "learning_rate": 6.825255200327695e-06, "epoch": 1.6148303703971902, "total_flos": 1541247152765583360, "step": 401400 }, { "loss": 3.34, "learning_rate": 6.824444074931764e-06, "epoch": 1.6152326699413848, "total_flos": 1541643190854451200, "step": 401500 }, { "loss": 3.40625, "learning_rate": 6.823632949535834e-06, "epoch": 1.6156349694855796, "total_flos": 1542018424807464960, "step": 401600 }, { "loss": 3.3025, "learning_rate": 6.822821824139903e-06, "epoch": 1.6160372690297742, "total_flos": 1542392617756999680, "step": 401700 }, { "loss": 3.35, "learning_rate": 6.8220106987439725e-06, "epoch": 1.6164395685739688, "total_flos": 1542777645640704000, "step": 401800 }, { "loss": 3.30125, "learning_rate": 6.821199573348042e-06, "epoch": 1.6168418681181635, "total_flos": 1543171968198328320, "step": 401900 }, { "loss": 3.37125, "learning_rate": 6.820388447952112e-06, "epoch": 1.6172441676623581, "total_flos": 1543559173691351040, "step": 402000 }, { "loss": 3.3425, "learning_rate": 6.819577322556181e-06, "epoch": 1.6176464672065527, "total_flos": 1543926908170321920, "step": 402100 }, { "loss": 3.28875, "learning_rate": 6.81876619716025e-06, "epoch": 1.6180487667507473, "total_flos": 1544317682818129920, "step": 402200 }, { "loss": 3.33875, "learning_rate": 6.817955071764319e-06, "epoch": 1.6184510662949418, "total_flos": 1544705971804569600, "step": 402300 }, { "loss": 3.28875, "learning_rate": 6.817143946368389e-06, "epoch": 1.6188533658391364, "total_flos": 1545096496823992320, "step": 402400 }, { "loss": 3.335, "learning_rate": 6.8163328209724585e-06, "epoch": 1.619255665383331, "total_flos": 1545486947486023680, "step": 402500 }, { "loss": 3.29125, "learning_rate": 6.815521695576528e-06, "epoch": 1.6196579649275258, "total_flos": 1545853741875118080, "step": 402600 }, { "loss": 3.25875, "learning_rate": 6.814710570180597e-06, "epoch": 1.6200602644717204, "total_flos": 1546233803747328000, "step": 402700 }, { "loss": 3.33, "learning_rate": 6.813899444784667e-06, "epoch": 1.620462564015915, "total_flos": 1546613674414817280, "step": 402800 }, { "loss": 3.295, "learning_rate": 6.813088319388736e-06, "epoch": 1.6208648635601097, "total_flos": 1547004815538339840, "step": 402900 }, { "loss": 3.28875, "learning_rate": 6.812277193992805e-06, "epoch": 1.6212671631043043, "total_flos": 1547392254726021120, "step": 403000 }, { "loss": 3.35, "learning_rate": 6.8114660685968745e-06, "epoch": 1.6216694626484989, "total_flos": 1547775598945935360, "step": 403100 }, { "loss": 3.3475, "learning_rate": 6.810654943200945e-06, "epoch": 1.6220717621926934, "total_flos": 1548155246541250560, "step": 403200 }, { "loss": 3.325, "learning_rate": 6.809843817805014e-06, "epoch": 1.622474061736888, "total_flos": 1548540486874644480, "step": 403300 }, { "loss": 3.365, "learning_rate": 6.809032692409083e-06, "epoch": 1.6228763612810826, "total_flos": 1548929694705991680, "step": 403400 }, { "loss": 3.2625, "learning_rate": 6.808221567013154e-06, "epoch": 1.6232786608252772, "total_flos": 1549315912307957760, "step": 403500 }, { "loss": 3.34375, "learning_rate": 6.807410441617222e-06, "epoch": 1.6236809603694717, "total_flos": 1549695039401533440, "step": 403600 }, { "loss": 3.34625, "learning_rate": 6.806599316221291e-06, "epoch": 1.6240832599136665, "total_flos": 1550086153968844800, "step": 403700 }, { "loss": 3.29, "learning_rate": 6.805788190825361e-06, "epoch": 1.624485559457861, "total_flos": 1550464479064842240, "step": 403800 }, { "loss": 3.36125, "learning_rate": 6.8049770654294315e-06, "epoch": 1.624887859002056, "total_flos": 1550849464458608640, "step": 403900 }, { "loss": 3.33625, "learning_rate": 6.804165940033501e-06, "epoch": 1.6252901585462505, "total_flos": 1551214994772049920, "step": 404000 }, { "loss": 3.3725, "learning_rate": 6.803354814637569e-06, "epoch": 1.625692458090445, "total_flos": 1551595343451340800, "step": 404100 }, { "loss": 3.30125, "learning_rate": 6.802543689241638e-06, "epoch": 1.6260947576346396, "total_flos": 1551980960882933760, "step": 404200 }, { "loss": 3.29375, "learning_rate": 6.801732563845709e-06, "epoch": 1.6264970571788342, "total_flos": 1552373551975587840, "step": 404300 }, { "loss": 3.32375, "learning_rate": 6.800921438449778e-06, "epoch": 1.6268993567230288, "total_flos": 1552761671002275840, "step": 404400 }, { "loss": 3.28375, "learning_rate": 6.800110313053847e-06, "epoch": 1.6273016562672233, "total_flos": 1553163615192514560, "step": 404500 }, { "loss": 3.35, "learning_rate": 6.799299187657916e-06, "epoch": 1.627703955811418, "total_flos": 1553556769276846080, "step": 404600 }, { "loss": 3.32, "learning_rate": 6.798488062261987e-06, "epoch": 1.6281062553556127, "total_flos": 1553943560492974080, "step": 404700 }, { "loss": 3.32875, "learning_rate": 6.797676936866056e-06, "epoch": 1.6285085548998073, "total_flos": 1554347390174208000, "step": 404800 }, { "loss": 3.33875, "learning_rate": 6.796865811470124e-06, "epoch": 1.628910854444002, "total_flos": 1554737761167605760, "step": 404900 }, { "loss": 3.28625, "learning_rate": 6.7960546860741935e-06, "epoch": 1.6293131539881966, "total_flos": 1555109861487697920, "step": 405000 }, { "loss": 3.33625, "learning_rate": 6.795243560678264e-06, "epoch": 1.6297154535323912, "total_flos": 1555501363775692800, "step": 405100 }, { "loss": 3.30875, "learning_rate": 6.794432435282334e-06, "epoch": 1.6301177530765858, "total_flos": 1555885292232253440, "step": 405200 }, { "loss": 3.30625, "learning_rate": 6.793621309886402e-06, "epoch": 1.6305200526207804, "total_flos": 1556266958099619840, "step": 405300 }, { "loss": 3.295, "learning_rate": 6.792810184490471e-06, "epoch": 1.630922352164975, "total_flos": 1556659782886932480, "step": 405400 }, { "loss": 3.35, "learning_rate": 6.791999059094542e-06, "epoch": 1.6313246517091695, "total_flos": 1557054195735674880, "step": 405500 }, { "loss": 3.3125, "learning_rate": 6.791187933698611e-06, "epoch": 1.631726951253364, "total_flos": 1557429631515893760, "step": 405600 }, { "loss": 3.3175, "learning_rate": 6.7903768083026796e-06, "epoch": 1.6321292507975589, "total_flos": 1557810001440153600, "step": 405700 }, { "loss": 3.35, "learning_rate": 6.789565682906749e-06, "epoch": 1.6325315503417535, "total_flos": 1558188039729070080, "step": 405800 }, { "loss": 3.33625, "learning_rate": 6.78875455751082e-06, "epoch": 1.632933849885948, "total_flos": 1558581188502159360, "step": 405900 }, { "loss": 3.28375, "learning_rate": 6.787943432114889e-06, "epoch": 1.6333361494301428, "total_flos": 1558957649352130560, "step": 406000 }, { "loss": 3.33375, "learning_rate": 6.787132306718957e-06, "epoch": 1.6337384489743374, "total_flos": 1559335809799618560, "step": 406100 }, { "loss": 3.295, "learning_rate": 6.786321181323026e-06, "epoch": 1.634140748518532, "total_flos": 1559714044604497920, "step": 406200 }, { "loss": 3.33625, "learning_rate": 6.785510055927097e-06, "epoch": 1.6345430480627265, "total_flos": 1560105775275909120, "step": 406300 }, { "loss": 3.31, "learning_rate": 6.7846989305311665e-06, "epoch": 1.6349453476069211, "total_flos": 1560500448375521280, "step": 406400 }, { "loss": 3.36, "learning_rate": 6.783887805135236e-06, "epoch": 1.6353476471511157, "total_flos": 1560876574617231360, "step": 406500 }, { "loss": 3.2925, "learning_rate": 6.783076679739304e-06, "epoch": 1.6357499466953103, "total_flos": 1561265214145658880, "step": 406600 }, { "loss": 3.3825, "learning_rate": 6.782265554343375e-06, "epoch": 1.636152246239505, "total_flos": 1561629480383447040, "step": 406700 }, { "loss": 3.30375, "learning_rate": 6.781454428947444e-06, "epoch": 1.6365545457836996, "total_flos": 1562007104395468800, "step": 406800 }, { "loss": 3.315, "learning_rate": 6.780643303551513e-06, "epoch": 1.6369568453278942, "total_flos": 1562402101480857600, "step": 406900 }, { "loss": 3.3225, "learning_rate": 6.779832178155582e-06, "epoch": 1.637359144872089, "total_flos": 1562790703830589440, "step": 407000 }, { "loss": 3.31, "learning_rate": 6.7790210527596525e-06, "epoch": 1.6377614444162836, "total_flos": 1563185902743183360, "step": 407100 }, { "loss": 3.285, "learning_rate": 6.778209927363722e-06, "epoch": 1.6381637439604781, "total_flos": 1563563404596633600, "step": 407200 }, { "loss": 3.3075, "learning_rate": 6.777398801967791e-06, "epoch": 1.6385660435046727, "total_flos": 1563949122941829120, "step": 407300 }, { "loss": 3.28875, "learning_rate": 6.776587676571859e-06, "epoch": 1.6389683430488673, "total_flos": 1564330486068387840, "step": 407400 }, { "loss": 3.31875, "learning_rate": 6.77577655117593e-06, "epoch": 1.6393706425930619, "total_flos": 1564720352493772800, "step": 407500 }, { "loss": 3.32875, "learning_rate": 6.774965425779999e-06, "epoch": 1.6397729421372564, "total_flos": 1565107032173813760, "step": 407600 }, { "loss": 3.3425, "learning_rate": 6.7741543003840686e-06, "epoch": 1.640175241681451, "total_flos": 1565487237449564160, "step": 407700 }, { "loss": 3.345, "learning_rate": 6.773343174988137e-06, "epoch": 1.6405775412256458, "total_flos": 1565872424670535680, "step": 407800 }, { "loss": 3.33125, "learning_rate": 6.772532049592208e-06, "epoch": 1.6409798407698404, "total_flos": 1566278161087733760, "step": 407900 }, { "loss": 3.33875, "learning_rate": 6.771720924196277e-06, "epoch": 1.6413821403140352, "total_flos": 1566659609194168320, "step": 408000 }, { "loss": 3.3075, "learning_rate": 6.770909798800346e-06, "epoch": 1.6417844398582297, "total_flos": 1567038932803706880, "step": 408100 }, { "loss": 3.32875, "learning_rate": 6.7700986734044145e-06, "epoch": 1.6421867394024243, "total_flos": 1567432851706920960, "step": 408200 }, { "loss": 3.2875, "learning_rate": 6.7692875480084854e-06, "epoch": 1.642589038946619, "total_flos": 1567810486341427200, "step": 408300 }, { "loss": 3.285, "learning_rate": 6.768476422612555e-06, "epoch": 1.6429913384908135, "total_flos": 1568202636600975360, "step": 408400 }, { "loss": 3.295, "learning_rate": 6.767665297216624e-06, "epoch": 1.643393638035008, "total_flos": 1568591451400396800, "step": 408500 }, { "loss": 3.35375, "learning_rate": 6.766854171820694e-06, "epoch": 1.6437959375792026, "total_flos": 1568973712126894080, "step": 408600 }, { "loss": 3.34125, "learning_rate": 6.766043046424763e-06, "epoch": 1.6441982371233972, "total_flos": 1569350337625374720, "step": 408700 }, { "loss": 3.26625, "learning_rate": 6.765231921028832e-06, "epoch": 1.644600536667592, "total_flos": 1569738137977528320, "step": 408800 }, { "loss": 3.35875, "learning_rate": 6.7644207956329015e-06, "epoch": 1.6450028362117866, "total_flos": 1570135291427266560, "step": 408900 }, { "loss": 3.325, "learning_rate": 6.7636096702369715e-06, "epoch": 1.6454051357559814, "total_flos": 1570512347136368640, "step": 409000 }, { "loss": 3.2625, "learning_rate": 6.762798544841041e-06, "epoch": 1.645807435300176, "total_flos": 1570891229912801280, "step": 409100 }, { "loss": 3.335, "learning_rate": 6.76198741944511e-06, "epoch": 1.6462097348443705, "total_flos": 1571292127788318720, "step": 409200 }, { "loss": 3.23125, "learning_rate": 6.761176294049179e-06, "epoch": 1.646612034388565, "total_flos": 1571672497712578560, "step": 409300 }, { "loss": 3.34, "learning_rate": 6.760365168653249e-06, "epoch": 1.6470143339327596, "total_flos": 1572051449535160320, "step": 409400 }, { "loss": 3.345, "learning_rate": 6.759554043257318e-06, "epoch": 1.6474166334769542, "total_flos": 1572440875127439360, "step": 409500 }, { "loss": 3.28125, "learning_rate": 6.7587429178613875e-06, "epoch": 1.6478189330211488, "total_flos": 1572808641473863680, "step": 409600 }, { "loss": 3.32875, "learning_rate": 6.757931792465457e-06, "epoch": 1.6482212325653434, "total_flos": 1573189521277378560, "step": 409700 }, { "loss": 3.32125, "learning_rate": 6.757120667069527e-06, "epoch": 1.6486235321095382, "total_flos": 1573569052025364480, "step": 409800 }, { "loss": 3.315, "learning_rate": 6.756309541673596e-06, "epoch": 1.6490258316537327, "total_flos": 1573948147251486720, "step": 409900 }, { "loss": 3.3275, "learning_rate": 6.755498416277665e-06, "epoch": 1.6494281311979273, "total_flos": 1574341465984327680, "step": 410000 }, { "loss": 3.33875, "learning_rate": 6.754687290881734e-06, "epoch": 1.649830430742122, "total_flos": 1574720816150077440, "step": 410100 }, { "loss": 3.325, "learning_rate": 6.753876165485804e-06, "epoch": 1.6502327302863167, "total_flos": 1575104590580613120, "step": 410200 }, { "loss": 3.28, "learning_rate": 6.753065040089874e-06, "epoch": 1.6506350298305112, "total_flos": 1575487329318912000, "step": 410300 }, { "loss": 3.32, "learning_rate": 6.752253914693943e-06, "epoch": 1.6510373293747058, "total_flos": 1575886570086850560, "step": 410400 }, { "loss": 3.3475, "learning_rate": 6.751442789298012e-06, "epoch": 1.6514396289189004, "total_flos": 1576268379357757440, "step": 410500 }, { "loss": 3.2475, "learning_rate": 6.750631663902082e-06, "epoch": 1.651841928463095, "total_flos": 1576646162707046400, "step": 410600 }, { "loss": 3.2775, "learning_rate": 6.749820538506151e-06, "epoch": 1.6522442280072895, "total_flos": 1577050959034368000, "step": 410700 }, { "loss": 3.31875, "learning_rate": 6.74900941311022e-06, "epoch": 1.6526465275514843, "total_flos": 1577427754492600320, "step": 410800 }, { "loss": 3.36625, "learning_rate": 6.74819828771429e-06, "epoch": 1.653048827095679, "total_flos": 1577814173921771520, "step": 410900 }, { "loss": 3.31125, "learning_rate": 6.74738716231836e-06, "epoch": 1.6534511266398735, "total_flos": 1578190055846338560, "step": 411000 }, { "loss": 3.29625, "learning_rate": 6.746576036922429e-06, "epoch": 1.6538534261840683, "total_flos": 1578572215659233280, "step": 411100 }, { "loss": 3.335, "learning_rate": 6.745764911526498e-06, "epoch": 1.6542557257282628, "total_flos": 1578956181294489600, "step": 411200 }, { "loss": 3.30625, "learning_rate": 6.744953786130567e-06, "epoch": 1.6546580252724574, "total_flos": 1579331059394273280, "step": 411300 }, { "loss": 3.28125, "learning_rate": 6.744142660734637e-06, "epoch": 1.655060324816652, "total_flos": 1579722715708293120, "step": 411400 }, { "loss": 3.33375, "learning_rate": 6.7433315353387065e-06, "epoch": 1.6554626243608466, "total_flos": 1580116592121569280, "step": 411500 }, { "loss": 3.31625, "learning_rate": 6.742520409942776e-06, "epoch": 1.6558649239050411, "total_flos": 1580504955465400320, "step": 411600 }, { "loss": 3.2825, "learning_rate": 6.741709284546845e-06, "epoch": 1.6562672234492357, "total_flos": 1580879344930897920, "step": 411700 }, { "loss": 3.28125, "learning_rate": 6.740898159150915e-06, "epoch": 1.6566695229934303, "total_flos": 1581236456925388800, "step": 411800 }, { "loss": 3.3725, "learning_rate": 6.740087033754984e-06, "epoch": 1.657071822537625, "total_flos": 1581635989811650560, "step": 411900 }, { "loss": 3.31625, "learning_rate": 6.739275908359053e-06, "epoch": 1.6574741220818197, "total_flos": 1582013895319511040, "step": 412000 }, { "loss": 3.33125, "learning_rate": 6.7384647829631225e-06, "epoch": 1.6578764216260145, "total_flos": 1582402285219553280, "step": 412100 }, { "loss": 3.30625, "learning_rate": 6.7376536575671925e-06, "epoch": 1.658278721170209, "total_flos": 1582791121263943680, "step": 412200 }, { "loss": 3.29625, "learning_rate": 6.736842532171262e-06, "epoch": 1.6586810207144036, "total_flos": 1583167789252362240, "step": 412300 }, { "loss": 3.27, "learning_rate": 6.736031406775331e-06, "epoch": 1.6590833202585982, "total_flos": 1583553364194017280, "step": 412400 }, { "loss": 3.36, "learning_rate": 6.7352202813794e-06, "epoch": 1.6594856198027927, "total_flos": 1583937287339335680, "step": 412500 }, { "loss": 3.3075, "learning_rate": 6.73440915598347e-06, "epoch": 1.6598879193469873, "total_flos": 1584331211553792000, "step": 412600 }, { "loss": 3.27125, "learning_rate": 6.733598030587539e-06, "epoch": 1.660290218891182, "total_flos": 1584728662433095680, "step": 412700 }, { "loss": 3.32375, "learning_rate": 6.7327869051916086e-06, "epoch": 1.6606925184353765, "total_flos": 1585137952071352320, "step": 412800 }, { "loss": 3.3075, "learning_rate": 6.731975779795678e-06, "epoch": 1.6610948179795713, "total_flos": 1585525720556052480, "step": 412900 }, { "loss": 3.26625, "learning_rate": 6.731164654399748e-06, "epoch": 1.6614971175237658, "total_flos": 1585888844876759040, "step": 413000 }, { "loss": 3.30375, "learning_rate": 6.730353529003817e-06, "epoch": 1.6618994170679606, "total_flos": 1586283608267489280, "step": 413100 }, { "loss": 3.265, "learning_rate": 6.729542403607886e-06, "epoch": 1.6623017166121552, "total_flos": 1586672380576972800, "step": 413200 }, { "loss": 3.3175, "learning_rate": 6.728731278211956e-06, "epoch": 1.6627040161563498, "total_flos": 1587073868000378880, "step": 413300 }, { "loss": 3.2375, "learning_rate": 6.7279201528160254e-06, "epoch": 1.6631063157005443, "total_flos": 1587447742275379200, "step": 413400 }, { "loss": 3.28125, "learning_rate": 6.727109027420095e-06, "epoch": 1.663508615244739, "total_flos": 1587825286618767360, "step": 413500 }, { "loss": 3.3025, "learning_rate": 6.726297902024164e-06, "epoch": 1.6639109147889335, "total_flos": 1588215317692661760, "step": 413600 }, { "loss": 3.30625, "learning_rate": 6.725486776628234e-06, "epoch": 1.664313214333128, "total_flos": 1588595799153008640, "step": 413700 }, { "loss": 3.34625, "learning_rate": 6.724675651232303e-06, "epoch": 1.6647155138773226, "total_flos": 1588971070284718080, "step": 413800 }, { "loss": 3.30375, "learning_rate": 6.723864525836372e-06, "epoch": 1.6651178134215174, "total_flos": 1589355381150720000, "step": 413900 }, { "loss": 3.2975, "learning_rate": 6.7230534004404415e-06, "epoch": 1.665520112965712, "total_flos": 1589742363571568640, "step": 414000 }, { "loss": 3.24875, "learning_rate": 6.7222422750445115e-06, "epoch": 1.6659224125099066, "total_flos": 1590122223616573440, "step": 414100 }, { "loss": 3.29375, "learning_rate": 6.721431149648581e-06, "epoch": 1.6663247120541014, "total_flos": 1590495210914119680, "step": 414200 }, { "loss": 3.295, "learning_rate": 6.72062002425265e-06, "epoch": 1.666727011598296, "total_flos": 1590875681751982080, "step": 414300 }, { "loss": 3.3525, "learning_rate": 6.719808898856719e-06, "epoch": 1.6671293111424905, "total_flos": 1591258069948293120, "step": 414400 }, { "loss": 3.2825, "learning_rate": 6.718997773460789e-06, "epoch": 1.667531610686685, "total_flos": 1591634143077580800, "step": 414500 }, { "loss": 3.275, "learning_rate": 6.718186648064858e-06, "epoch": 1.6679339102308797, "total_flos": 1592020047316254720, "step": 414600 }, { "loss": 3.35625, "learning_rate": 6.7173755226689275e-06, "epoch": 1.6683362097750742, "total_flos": 1592402297420267520, "step": 414700 }, { "loss": 3.27375, "learning_rate": 6.716564397272997e-06, "epoch": 1.6687385093192688, "total_flos": 1592775513101230080, "step": 414800 }, { "loss": 3.31875, "learning_rate": 6.715753271877067e-06, "epoch": 1.6691408088634636, "total_flos": 1593143640612126720, "step": 414900 }, { "loss": 3.34, "learning_rate": 6.714942146481136e-06, "epoch": 1.6695431084076582, "total_flos": 1593531828684963840, "step": 415000 }, { "loss": 3.24875, "learning_rate": 6.714131021085205e-06, "epoch": 1.6699454079518528, "total_flos": 1593928344785633280, "step": 415100 }, { "loss": 3.34125, "learning_rate": 6.713319895689274e-06, "epoch": 1.6703477074960476, "total_flos": 1594319682425118720, "step": 415200 }, { "loss": 3.27875, "learning_rate": 6.712508770293344e-06, "epoch": 1.6707500070402421, "total_flos": 1594703016022548480, "step": 415300 }, { "loss": 3.32, "learning_rate": 6.711697644897414e-06, "epoch": 1.6711523065844367, "total_flos": 1595091666173460480, "step": 415400 }, { "loss": 3.31125, "learning_rate": 6.710886519501483e-06, "epoch": 1.6715546061286313, "total_flos": 1595480470350397440, "step": 415500 }, { "loss": 3.3175, "learning_rate": 6.710075394105552e-06, "epoch": 1.6719569056728258, "total_flos": 1595865923133480960, "step": 415600 }, { "loss": 3.3175, "learning_rate": 6.709264268709622e-06, "epoch": 1.6723592052170204, "total_flos": 1596261339807006720, "step": 415700 }, { "loss": 3.285, "learning_rate": 6.708453143313691e-06, "epoch": 1.672761504761215, "total_flos": 1596650074937794560, "step": 415800 }, { "loss": 3.315, "learning_rate": 6.70764201791776e-06, "epoch": 1.6731638043054096, "total_flos": 1597036972378767360, "step": 415900 }, { "loss": 3.31125, "learning_rate": 6.70683089252183e-06, "epoch": 1.6735661038496044, "total_flos": 1597420964570234880, "step": 416000 }, { "loss": 3.325, "learning_rate": 6.7060197671259e-06, "epoch": 1.673968403393799, "total_flos": 1597810299871395840, "step": 416100 }, { "loss": 3.33625, "learning_rate": 6.705208641729969e-06, "epoch": 1.6743707029379937, "total_flos": 1598195083437957120, "step": 416200 }, { "loss": 3.25625, "learning_rate": 6.704397516334038e-06, "epoch": 1.6747730024821883, "total_flos": 1598581056722780160, "step": 416300 }, { "loss": 3.27375, "learning_rate": 6.703586390938107e-06, "epoch": 1.6751753020263829, "total_flos": 1598979941637488640, "step": 416400 }, { "loss": 3.3, "learning_rate": 6.702775265542177e-06, "epoch": 1.6755776015705774, "total_flos": 1599341419473100800, "step": 416500 }, { "loss": 3.3325, "learning_rate": 6.7019641401462465e-06, "epoch": 1.675979901114772, "total_flos": 1599713461369528320, "step": 416600 }, { "loss": 3.2775, "learning_rate": 6.701153014750316e-06, "epoch": 1.6763822006589666, "total_flos": 1600127435523440640, "step": 416700 }, { "loss": 3.3225, "learning_rate": 6.700341889354385e-06, "epoch": 1.6767845002031612, "total_flos": 1600537272219648000, "step": 416800 }, { "loss": 3.3275, "learning_rate": 6.699530763958455e-06, "epoch": 1.6771867997473557, "total_flos": 1600930245721743360, "step": 416900 }, { "loss": 3.2875, "learning_rate": 6.698719638562524e-06, "epoch": 1.6775890992915505, "total_flos": 1601322231332782080, "step": 417000 }, { "loss": 3.30875, "learning_rate": 6.697908513166593e-06, "epoch": 1.677991398835745, "total_flos": 1601721307452211200, "step": 417100 }, { "loss": 3.3425, "learning_rate": 6.6970973877706625e-06, "epoch": 1.6783936983799397, "total_flos": 1602096674186280960, "step": 417200 }, { "loss": 3.26, "learning_rate": 6.6962862623747325e-06, "epoch": 1.6787959979241345, "total_flos": 1602491108279992320, "step": 417300 }, { "loss": 3.28375, "learning_rate": 6.695475136978802e-06, "epoch": 1.679198297468329, "total_flos": 1602868333948846080, "step": 417400 }, { "loss": 3.28, "learning_rate": 6.694664011582871e-06, "epoch": 1.6796005970125236, "total_flos": 1603249357155901440, "step": 417500 }, { "loss": 3.30875, "learning_rate": 6.69385288618694e-06, "epoch": 1.6800028965567182, "total_flos": 1603628452382023680, "step": 417600 }, { "loss": 3.3275, "learning_rate": 6.69304176079101e-06, "epoch": 1.6804051961009128, "total_flos": 1604011207054049280, "step": 417700 }, { "loss": 3.3125, "learning_rate": 6.692230635395079e-06, "epoch": 1.6808074956451073, "total_flos": 1604402534071050240, "step": 417800 }, { "loss": 3.26625, "learning_rate": 6.6914195099991486e-06, "epoch": 1.681209795189302, "total_flos": 1604776950092759040, "step": 417900 }, { "loss": 3.26625, "learning_rate": 6.690608384603219e-06, "epoch": 1.6816120947334967, "total_flos": 1605156953541304320, "step": 418000 }, { "loss": 3.2825, "learning_rate": 6.689797259207288e-06, "epoch": 1.6820143942776913, "total_flos": 1605522096134062080, "step": 418100 }, { "loss": 3.305, "learning_rate": 6.688986133811357e-06, "epoch": 1.6824166938218859, "total_flos": 1605894658532229120, "step": 418200 }, { "loss": 3.365, "learning_rate": 6.688175008415426e-06, "epoch": 1.6828189933660807, "total_flos": 1606284795830968320, "step": 418300 }, { "loss": 3.32125, "learning_rate": 6.687363883019496e-06, "epoch": 1.6832212929102752, "total_flos": 1606667688595292160, "step": 418400 }, { "loss": 3.30875, "learning_rate": 6.6865527576235654e-06, "epoch": 1.6836235924544698, "total_flos": 1607055557993594880, "step": 418500 }, { "loss": 3.28875, "learning_rate": 6.685741632227635e-06, "epoch": 1.6840258919986644, "total_flos": 1607451298652897280, "step": 418600 }, { "loss": 3.2875, "learning_rate": 6.684930506831704e-06, "epoch": 1.684428191542859, "total_flos": 1607821449747087360, "step": 418700 }, { "loss": 3.3575, "learning_rate": 6.684119381435774e-06, "epoch": 1.6848304910870535, "total_flos": 1608199716419420160, "step": 418800 }, { "loss": 3.32125, "learning_rate": 6.683308256039843e-06, "epoch": 1.685232790631248, "total_flos": 1608569012403609600, "step": 418900 }, { "loss": 3.345, "learning_rate": 6.682497130643912e-06, "epoch": 1.6856350901754429, "total_flos": 1608951825499299840, "step": 419000 }, { "loss": 3.2975, "learning_rate": 6.6816860052479815e-06, "epoch": 1.6860373897196375, "total_flos": 1609336231967662080, "step": 419100 }, { "loss": 3.26625, "learning_rate": 6.6808748798520515e-06, "epoch": 1.686439689263832, "total_flos": 1609731531793858560, "step": 419200 }, { "loss": 3.31625, "learning_rate": 6.680063754456121e-06, "epoch": 1.6868419888080268, "total_flos": 1610109920624762880, "step": 419300 }, { "loss": 3.32375, "learning_rate": 6.67925262906019e-06, "epoch": 1.6872442883522214, "total_flos": 1610480135453859840, "step": 419400 }, { "loss": 3.30625, "learning_rate": 6.678441503664259e-06, "epoch": 1.687646587896416, "total_flos": 1610854747991531520, "step": 419500 }, { "loss": 3.3275, "learning_rate": 6.677630378268329e-06, "epoch": 1.6880488874406105, "total_flos": 1611240227330826240, "step": 419600 }, { "loss": 3.30875, "learning_rate": 6.676819252872398e-06, "epoch": 1.6884511869848051, "total_flos": 1611614866424709120, "step": 419700 }, { "loss": 3.285, "learning_rate": 6.6760081274764675e-06, "epoch": 1.6888534865289997, "total_flos": 1612008296693637120, "step": 419800 }, { "loss": 3.31625, "learning_rate": 6.675197002080537e-06, "epoch": 1.6892557860731943, "total_flos": 1612380646642114560, "step": 419900 }, { "loss": 3.27375, "learning_rate": 6.674385876684607e-06, "epoch": 1.6896580856173888, "total_flos": 1612761446776995840, "step": 420000 }, { "loss": 3.275, "learning_rate": 6.673574751288676e-06, "epoch": 1.6900603851615836, "total_flos": 1613153358030643200, "step": 420100 }, { "loss": 3.285, "learning_rate": 6.672763625892745e-06, "epoch": 1.6904626847057782, "total_flos": 1613528459202600960, "step": 420200 }, { "loss": 3.355, "learning_rate": 6.671952500496814e-06, "epoch": 1.690864984249973, "total_flos": 1613906003545989120, "step": 420300 }, { "loss": 3.3325, "learning_rate": 6.671141375100884e-06, "epoch": 1.6912672837941676, "total_flos": 1614292826629570560, "step": 420400 }, { "loss": 3.3075, "learning_rate": 6.670330249704954e-06, "epoch": 1.6916695833383621, "total_flos": 1614682087573340160, "step": 420500 }, { "loss": 3.22, "learning_rate": 6.669519124309023e-06, "epoch": 1.6920718828825567, "total_flos": 1615070966107668480, "step": 420600 }, { "loss": 3.3175, "learning_rate": 6.668707998913092e-06, "epoch": 1.6924741824267513, "total_flos": 1615461714199265280, "step": 420700 }, { "loss": 3.28375, "learning_rate": 6.667896873517162e-06, "epoch": 1.6928764819709459, "total_flos": 1615846120667627520, "step": 420800 }, { "loss": 3.27125, "learning_rate": 6.667085748121231e-06, "epoch": 1.6932787815151404, "total_flos": 1616228917829591040, "step": 420900 }, { "loss": 3.30625, "learning_rate": 6.6662746227253e-06, "epoch": 1.693681081059335, "total_flos": 1616612331095654400, "step": 421000 }, { "loss": 3.315, "learning_rate": 6.66546349732937e-06, "epoch": 1.6940833806035298, "total_flos": 1617000604148367360, "step": 421100 }, { "loss": 3.25875, "learning_rate": 6.66465237193344e-06, "epoch": 1.6944856801477244, "total_flos": 1617374823654113280, "step": 421200 }, { "loss": 3.27625, "learning_rate": 6.663841246537509e-06, "epoch": 1.694887979691919, "total_flos": 1617754779301478400, "step": 421300 }, { "loss": 3.3025, "learning_rate": 6.663030121141578e-06, "epoch": 1.6952902792361138, "total_flos": 1618142834593259520, "step": 421400 }, { "loss": 3.28875, "learning_rate": 6.662218995745647e-06, "epoch": 1.6956925787803083, "total_flos": 1618536472000634880, "step": 421500 }, { "loss": 3.2725, "learning_rate": 6.661407870349717e-06, "epoch": 1.696094878324503, "total_flos": 1618935510941368320, "step": 421600 }, { "loss": 3.28125, "learning_rate": 6.6605967449537865e-06, "epoch": 1.6964971778686975, "total_flos": 1619316773154324480, "step": 421700 }, { "loss": 3.3325, "learning_rate": 6.659785619557856e-06, "epoch": 1.696899477412892, "total_flos": 1619699336621629440, "step": 421800 }, { "loss": 3.34625, "learning_rate": 6.658974494161925e-06, "epoch": 1.6973017769570866, "total_flos": 1620091439079997440, "step": 421900 }, { "loss": 3.31125, "learning_rate": 6.658163368765995e-06, "epoch": 1.6977040765012812, "total_flos": 1620485618234081280, "step": 422000 }, { "loss": 3.32125, "learning_rate": 6.657352243370064e-06, "epoch": 1.698106376045476, "total_flos": 1620883313430528000, "step": 422100 }, { "loss": 3.26, "learning_rate": 6.656541117974133e-06, "epoch": 1.6985086755896706, "total_flos": 1621271793621688320, "step": 422200 }, { "loss": 3.2975, "learning_rate": 6.6557299925782025e-06, "epoch": 1.6989109751338651, "total_flos": 1621655212198993920, "step": 422300 }, { "loss": 3.2875, "learning_rate": 6.6549188671822725e-06, "epoch": 1.69931327467806, "total_flos": 1622036049512570880, "step": 422400 }, { "loss": 3.28375, "learning_rate": 6.654107741786342e-06, "epoch": 1.6997155742222545, "total_flos": 1622427201258577920, "step": 422500 }, { "loss": 3.32875, "learning_rate": 6.653296616390411e-06, "epoch": 1.700117873766449, "total_flos": 1622812728399052800, "step": 422600 }, { "loss": 3.3, "learning_rate": 6.652485490994481e-06, "epoch": 1.7005201733106436, "total_flos": 1623200603108597760, "step": 422700 }, { "loss": 3.31375, "learning_rate": 6.65167436559855e-06, "epoch": 1.7009224728548382, "total_flos": 1623602175511879680, "step": 422800 }, { "loss": 3.30875, "learning_rate": 6.650863240202619e-06, "epoch": 1.7013247723990328, "total_flos": 1623978312376074240, "step": 422900 }, { "loss": 3.3225, "learning_rate": 6.6500521148066886e-06, "epoch": 1.7017270719432274, "total_flos": 1624348978660761600, "step": 423000 }, { "loss": 3.32375, "learning_rate": 6.649240989410759e-06, "epoch": 1.702129371487422, "total_flos": 1624721456079052800, "step": 423100 }, { "loss": 3.26625, "learning_rate": 6.648429864014828e-06, "epoch": 1.7025316710316167, "total_flos": 1625111593377792000, "step": 423200 }, { "loss": 3.33, "learning_rate": 6.647618738618897e-06, "epoch": 1.7029339705758113, "total_flos": 1625476512898375680, "step": 423300 }, { "loss": 3.3175, "learning_rate": 6.646807613222966e-06, "epoch": 1.703336270120006, "total_flos": 1625864398230405120, "step": 423400 }, { "loss": 3.29125, "learning_rate": 6.645996487827036e-06, "epoch": 1.7037385696642007, "total_flos": 1626221164994150400, "step": 423500 }, { "loss": 3.3475, "learning_rate": 6.6451853624311054e-06, "epoch": 1.7041408692083952, "total_flos": 1626616050543452160, "step": 423600 }, { "loss": 3.27375, "learning_rate": 6.644374237035175e-06, "epoch": 1.7045431687525898, "total_flos": 1626993244344852480, "step": 423700 }, { "loss": 3.25625, "learning_rate": 6.643563111639244e-06, "epoch": 1.7049454682967844, "total_flos": 1627376163665387520, "step": 423800 }, { "loss": 3.3025, "learning_rate": 6.642751986243315e-06, "epoch": 1.705347767840979, "total_flos": 1627761308396421120, "step": 423900 }, { "loss": 3.32875, "learning_rate": 6.641940860847383e-06, "epoch": 1.7057500673851735, "total_flos": 1628137179698503680, "step": 424000 }, { "loss": 3.3025, "learning_rate": 6.641129735451452e-06, "epoch": 1.7061523669293681, "total_flos": 1628515255166115840, "step": 424100 }, { "loss": 3.29375, "learning_rate": 6.6403186100555215e-06, "epoch": 1.706554666473563, "total_flos": 1628893601507082240, "step": 424200 }, { "loss": 3.2925, "learning_rate": 6.639507484659592e-06, "epoch": 1.7069569660177575, "total_flos": 1629272404614881280, "step": 424300 }, { "loss": 3.28125, "learning_rate": 6.638696359263661e-06, "epoch": 1.7073592655619523, "total_flos": 1629675081756549120, "step": 424400 }, { "loss": 3.22125, "learning_rate": 6.63788523386773e-06, "epoch": 1.7077615651061469, "total_flos": 1630072957535232000, "step": 424500 }, { "loss": 3.26625, "learning_rate": 6.637074108471799e-06, "epoch": 1.7081638646503414, "total_flos": 1630461703288504320, "step": 424600 }, { "loss": 3.30375, "learning_rate": 6.63626298307587e-06, "epoch": 1.708566164194536, "total_flos": 1630850013519912960, "step": 424700 }, { "loss": 3.2625, "learning_rate": 6.635451857679938e-06, "epoch": 1.7089684637387306, "total_flos": 1631219893740748800, "step": 424800 }, { "loss": 3.29625, "learning_rate": 6.6346407322840075e-06, "epoch": 1.7093707632829251, "total_flos": 1631612033377812480, "step": 424900 }, { "loss": 3.28, "learning_rate": 6.633829606888077e-06, "epoch": 1.7097730628271197, "total_flos": 1631985126900203520, "step": 425000 }, { "loss": 3.27375, "learning_rate": 6.633018481492148e-06, "epoch": 1.7101753623713143, "total_flos": 1632356781075947520, "step": 425100 }, { "loss": 3.22625, "learning_rate": 6.632207356096216e-06, "epoch": 1.710577661915509, "total_flos": 1632741208789278720, "step": 425200 }, { "loss": 3.265, "learning_rate": 6.631396230700285e-06, "epoch": 1.7109799614597037, "total_flos": 1633121711494594560, "step": 425300 }, { "loss": 3.24625, "learning_rate": 6.630585105304354e-06, "epoch": 1.7113822610038982, "total_flos": 1633500604893511680, "step": 425400 }, { "loss": 3.27, "learning_rate": 6.629773979908425e-06, "epoch": 1.711784560548093, "total_flos": 1633894454750576640, "step": 425500 }, { "loss": 3.28, "learning_rate": 6.628962854512494e-06, "epoch": 1.7121868600922876, "total_flos": 1634272280589803520, "step": 425600 }, { "loss": 3.24, "learning_rate": 6.628151729116563e-06, "epoch": 1.7125891596364822, "total_flos": 1634663512004444160, "step": 425700 }, { "loss": 3.32375, "learning_rate": 6.627340603720632e-06, "epoch": 1.7129914591806767, "total_flos": 1635046665019637760, "step": 425800 }, { "loss": 3.275, "learning_rate": 6.626529478324703e-06, "epoch": 1.7133937587248713, "total_flos": 1635417549065256960, "step": 425900 }, { "loss": 3.28, "learning_rate": 6.625718352928771e-06, "epoch": 1.713796058269066, "total_flos": 1635806937478840320, "step": 426000 }, { "loss": 3.3125, "learning_rate": 6.62490722753284e-06, "epoch": 1.7141983578132605, "total_flos": 1636200250900439040, "step": 426100 }, { "loss": 3.26875, "learning_rate": 6.62409610213691e-06, "epoch": 1.7146006573574553, "total_flos": 1636579319570350080, "step": 426200 }, { "loss": 3.3175, "learning_rate": 6.6232849767409805e-06, "epoch": 1.7150029569016498, "total_flos": 1636956454948085760, "step": 426300 }, { "loss": 3.28, "learning_rate": 6.62247385134505e-06, "epoch": 1.7154052564458444, "total_flos": 1637336320304332800, "step": 426400 }, { "loss": 3.29375, "learning_rate": 6.621662725949118e-06, "epoch": 1.7158075559900392, "total_flos": 1637715325239336960, "step": 426500 }, { "loss": 3.28125, "learning_rate": 6.620851600553187e-06, "epoch": 1.7162098555342338, "total_flos": 1638108882978078720, "step": 426600 }, { "loss": 3.29375, "learning_rate": 6.620040475157258e-06, "epoch": 1.7166121550784283, "total_flos": 1638503513587752960, "step": 426700 }, { "loss": 3.305, "learning_rate": 6.619229349761327e-06, "epoch": 1.717014454622623, "total_flos": 1638880420582072320, "step": 426800 }, { "loss": 3.26375, "learning_rate": 6.618418224365396e-06, "epoch": 1.7174167541668175, "total_flos": 1639260944532357120, "step": 426900 }, { "loss": 3.2775, "learning_rate": 6.617607098969465e-06, "epoch": 1.717819053711012, "total_flos": 1639652606157619200, "step": 427000 }, { "loss": 3.24625, "learning_rate": 6.616795973573536e-06, "epoch": 1.7182213532552066, "total_flos": 1640054768108789760, "step": 427100 }, { "loss": 3.31875, "learning_rate": 6.615984848177605e-06, "epoch": 1.7186236527994012, "total_flos": 1640440369606656000, "step": 427200 }, { "loss": 3.30125, "learning_rate": 6.615173722781673e-06, "epoch": 1.719025952343596, "total_flos": 1640824218394583040, "step": 427300 }, { "loss": 3.285, "learning_rate": 6.614362597385744e-06, "epoch": 1.7194282518877906, "total_flos": 1641193551557468160, "step": 427400 }, { "loss": 3.265, "learning_rate": 6.613551471989813e-06, "epoch": 1.7198305514319854, "total_flos": 1641571318973030400, "step": 427500 }, { "loss": 3.2775, "learning_rate": 6.612740346593883e-06, "epoch": 1.72023285097618, "total_flos": 1641948624310517760, "step": 427600 }, { "loss": 3.3375, "learning_rate": 6.611929221197951e-06, "epoch": 1.7206351505203745, "total_flos": 1642335612042608640, "step": 427700 }, { "loss": 3.3275, "learning_rate": 6.611118095802022e-06, "epoch": 1.721037450064569, "total_flos": 1642707669872762880, "step": 427800 }, { "loss": 3.315, "learning_rate": 6.610306970406091e-06, "epoch": 1.7214397496087637, "total_flos": 1643096946750259200, "step": 427900 }, { "loss": 3.28125, "learning_rate": 6.60949584501016e-06, "epoch": 1.7218420491529582, "total_flos": 1643488395925831680, "step": 428000 }, { "loss": 3.3, "learning_rate": 6.6086847196142286e-06, "epoch": 1.7222443486971528, "total_flos": 1643876605243637760, "step": 428100 }, { "loss": 3.29, "learning_rate": 6.6078735942182995e-06, "epoch": 1.7226466482413474, "total_flos": 1644266779721072640, "step": 428200 }, { "loss": 3.28, "learning_rate": 6.607062468822369e-06, "epoch": 1.7230489477855422, "total_flos": 1644665250358886400, "step": 428300 }, { "loss": 3.31, "learning_rate": 6.606251343426438e-06, "epoch": 1.7234512473297368, "total_flos": 1645060528940113920, "step": 428400 }, { "loss": 3.3, "learning_rate": 6.605440218030506e-06, "epoch": 1.7238535468739316, "total_flos": 1645446396000092160, "step": 428500 }, { "loss": 3.265, "learning_rate": 6.604629092634577e-06, "epoch": 1.7242558464181261, "total_flos": 1645836129644421120, "step": 428600 }, { "loss": 3.3175, "learning_rate": 6.603817967238646e-06, "epoch": 1.7246581459623207, "total_flos": 1646218751535390720, "step": 428700 }, { "loss": 3.295, "learning_rate": 6.6030068418427155e-06, "epoch": 1.7250604455065153, "total_flos": 1646603933445120000, "step": 428800 }, { "loss": 3.2825, "learning_rate": 6.602195716446784e-06, "epoch": 1.7254627450507098, "total_flos": 1646997868282060800, "step": 428900 }, { "loss": 3.245, "learning_rate": 6.601384591050855e-06, "epoch": 1.7258650445949044, "total_flos": 1647386056354897920, "step": 429000 }, { "loss": 3.24, "learning_rate": 6.600573465654924e-06, "epoch": 1.726267344139099, "total_flos": 1647782487475691520, "step": 429100 }, { "loss": 3.3, "learning_rate": 6.599762340258993e-06, "epoch": 1.7266696436832936, "total_flos": 1648172098961448960, "step": 429200 }, { "loss": 3.25125, "learning_rate": 6.598951214863062e-06, "epoch": 1.7270719432274884, "total_flos": 1648557445519687680, "step": 429300 }, { "loss": 3.2275, "learning_rate": 6.598140089467132e-06, "epoch": 1.727474242771683, "total_flos": 1648932307685744640, "step": 429400 }, { "loss": 3.24625, "learning_rate": 6.5973289640712015e-06, "epoch": 1.7278765423158775, "total_flos": 1649304195556147200, "step": 429500 }, { "loss": 3.28375, "learning_rate": 6.596517838675271e-06, "epoch": 1.7282788418600723, "total_flos": 1649682563142082560, "step": 429600 }, { "loss": 3.3325, "learning_rate": 6.59570671327934e-06, "epoch": 1.7286811414042669, "total_flos": 1650076444866600960, "step": 429700 }, { "loss": 3.2875, "learning_rate": 6.59489558788341e-06, "epoch": 1.7290834409484614, "total_flos": 1650463607869685760, "step": 429800 }, { "loss": 3.24, "learning_rate": 6.594084462487479e-06, "epoch": 1.729485740492656, "total_flos": 1650851174527180800, "step": 429900 }, { "loss": 3.27, "learning_rate": 6.593273337091548e-06, "epoch": 1.7298880400368506, "total_flos": 1651228771982991360, "step": 430000 }, { "loss": 3.2525, "learning_rate": 6.5924622116956176e-06, "epoch": 1.7302903395810452, "total_flos": 1651593893330780160, "step": 430100 }, { "loss": 3.2325, "learning_rate": 6.591651086299688e-06, "epoch": 1.7306926391252397, "total_flos": 1651970540074229760, "step": 430200 }, { "loss": 3.24, "learning_rate": 6.590839960903757e-06, "epoch": 1.7310949386694345, "total_flos": 1652355148369797120, "step": 430300 }, { "loss": 3.2625, "learning_rate": 6.590028835507826e-06, "epoch": 1.731497238213629, "total_flos": 1652731455193743360, "step": 430400 }, { "loss": 3.2975, "learning_rate": 6.589217710111895e-06, "epoch": 1.7318995377578237, "total_flos": 1653123393003601920, "step": 430500 }, { "loss": 3.2975, "learning_rate": 6.588406584715965e-06, "epoch": 1.7323018373020185, "total_flos": 1653491552381952000, "step": 430600 }, { "loss": 3.29875, "learning_rate": 6.5875954593200344e-06, "epoch": 1.732704136846213, "total_flos": 1653865368233287680, "step": 430700 }, { "loss": 3.2525, "learning_rate": 6.586784333924104e-06, "epoch": 1.7331064363904076, "total_flos": 1654248531870965760, "step": 430800 }, { "loss": 3.295, "learning_rate": 6.585973208528173e-06, "epoch": 1.7335087359346022, "total_flos": 1654625231726837760, "step": 430900 }, { "loss": 3.30125, "learning_rate": 6.585162083132243e-06, "epoch": 1.7339110354787968, "total_flos": 1654999536212459520, "step": 431000 }, { "loss": 3.26125, "learning_rate": 6.584350957736312e-06, "epoch": 1.7343133350229913, "total_flos": 1655388600640266240, "step": 431100 }, { "loss": 3.24625, "learning_rate": 6.583539832340381e-06, "epoch": 1.734715634567186, "total_flos": 1655776475349811200, "step": 431200 }, { "loss": 3.245, "learning_rate": 6.5827287069444505e-06, "epoch": 1.7351179341113805, "total_flos": 1656166867588177920, "step": 431300 }, { "loss": 3.30625, "learning_rate": 6.5819175815485205e-06, "epoch": 1.7355202336555753, "total_flos": 1656551072229335040, "step": 431400 }, { "loss": 3.29125, "learning_rate": 6.58110645615259e-06, "epoch": 1.7359225331997699, "total_flos": 1656940354418073600, "step": 431500 }, { "loss": 3.3, "learning_rate": 6.580295330756659e-06, "epoch": 1.7363248327439647, "total_flos": 1657318945076183040, "step": 431600 }, { "loss": 3.26, "learning_rate": 6.579484205360728e-06, "epoch": 1.7367271322881592, "total_flos": 1657695937050378240, "step": 431700 }, { "loss": 3.35, "learning_rate": 6.578673079964798e-06, "epoch": 1.7371294318323538, "total_flos": 1658073922226872320, "step": 431800 }, { "loss": 3.205, "learning_rate": 6.577861954568867e-06, "epoch": 1.7375317313765484, "total_flos": 1658454180615045120, "step": 431900 }, { "loss": 3.25375, "learning_rate": 6.5770508291729365e-06, "epoch": 1.737934030920743, "total_flos": 1658836138600734720, "step": 432000 }, { "loss": 3.30875, "learning_rate": 6.5762397037770066e-06, "epoch": 1.7383363304649375, "total_flos": 1659224316051087360, "step": 432100 }, { "loss": 3.30875, "learning_rate": 6.575428578381076e-06, "epoch": 1.738738630009132, "total_flos": 1659610809837649920, "step": 432200 }, { "loss": 3.2575, "learning_rate": 6.574617452985145e-06, "epoch": 1.7391409295533267, "total_flos": 1659984227345817600, "step": 432300 }, { "loss": 3.255, "learning_rate": 6.573806327589214e-06, "epoch": 1.7395432290975215, "total_flos": 1660363492531691520, "step": 432400 }, { "loss": 3.26625, "learning_rate": 6.572995202193284e-06, "epoch": 1.739945528641716, "total_flos": 1660736862238679040, "step": 432500 }, { "loss": 3.305, "learning_rate": 6.572184076797353e-06, "epoch": 1.7403478281859108, "total_flos": 1661127812157480960, "step": 432600 }, { "loss": 3.26875, "learning_rate": 6.571372951401423e-06, "epoch": 1.7407501277301054, "total_flos": 1661512234559569920, "step": 432700 }, { "loss": 3.26, "learning_rate": 6.570561826005492e-06, "epoch": 1.7411524272743, "total_flos": 1661899498476257280, "step": 432800 }, { "loss": 3.25875, "learning_rate": 6.569750700609562e-06, "epoch": 1.7415547268184945, "total_flos": 1662291824006799360, "step": 432900 }, { "loss": 3.31875, "learning_rate": 6.568939575213631e-06, "epoch": 1.7419570263626891, "total_flos": 1662680798143488000, "step": 433000 }, { "loss": 3.29, "learning_rate": 6.5681284498177e-06, "epoch": 1.7423593259068837, "total_flos": 1663045972603699200, "step": 433100 }, { "loss": 3.2775, "learning_rate": 6.567317324421769e-06, "epoch": 1.7427616254510783, "total_flos": 1663428721964482560, "step": 433200 }, { "loss": 3.24125, "learning_rate": 6.5665061990258395e-06, "epoch": 1.7431639249952728, "total_flos": 1663808905995264000, "step": 433300 }, { "loss": 3.225, "learning_rate": 6.565695073629909e-06, "epoch": 1.7435662245394676, "total_flos": 1664197253405368320, "step": 433400 }, { "loss": 3.25625, "learning_rate": 6.564883948233978e-06, "epoch": 1.7439685240836622, "total_flos": 1664569231566888960, "step": 433500 }, { "loss": 3.26, "learning_rate": 6.564072822838047e-06, "epoch": 1.7443708236278568, "total_flos": 1664960144306995200, "step": 433600 }, { "loss": 3.295, "learning_rate": 6.563261697442117e-06, "epoch": 1.7447731231720516, "total_flos": 1665349447740702720, "step": 433700 }, { "loss": 3.26, "learning_rate": 6.562450572046186e-06, "epoch": 1.7451754227162461, "total_flos": 1665732701669498880, "step": 433800 }, { "loss": 3.30625, "learning_rate": 6.5616394466502555e-06, "epoch": 1.7455777222604407, "total_flos": 1666107850642636800, "step": 433900 }, { "loss": 3.26875, "learning_rate": 6.560828321254325e-06, "epoch": 1.7459800218046353, "total_flos": 1666480843251425280, "step": 434000 }, { "loss": 3.2575, "learning_rate": 6.560017195858395e-06, "epoch": 1.7463823213488299, "total_flos": 1666858626600714240, "step": 434100 }, { "loss": 3.23625, "learning_rate": 6.559206070462464e-06, "epoch": 1.7467846208930244, "total_flos": 1667242092979200000, "step": 434200 }, { "loss": 3.295, "learning_rate": 6.558394945066533e-06, "epoch": 1.747186920437219, "total_flos": 1667632023139491840, "step": 434300 }, { "loss": 3.25625, "learning_rate": 6.557583819670602e-06, "epoch": 1.7475892199814138, "total_flos": 1668004585537658880, "step": 434400 }, { "loss": 3.27375, "learning_rate": 6.556772694274672e-06, "epoch": 1.7479915195256084, "total_flos": 1668369436012093440, "step": 434500 }, { "loss": 3.21375, "learning_rate": 6.5559615688787415e-06, "epoch": 1.748393819069803, "total_flos": 1668745137354424320, "step": 434600 }, { "loss": 3.30625, "learning_rate": 6.555150443482811e-06, "epoch": 1.7487961186139978, "total_flos": 1669129936854712320, "step": 434700 }, { "loss": 3.2625, "learning_rate": 6.55433931808688e-06, "epoch": 1.7491984181581923, "total_flos": 1669495626505420800, "step": 434800 }, { "loss": 3.30125, "learning_rate": 6.55352819269095e-06, "epoch": 1.749600717702387, "total_flos": 1669857226499604480, "step": 434900 }, { "loss": 3.24625, "learning_rate": 6.552717067295019e-06, "epoch": 1.7500030172465815, "total_flos": 1670242557124116480, "step": 435000 }, { "loss": 3.25125, "learning_rate": 6.551905941899088e-06, "epoch": 1.750405316790776, "total_flos": 1670628456051548160, "step": 435100 }, { "loss": 3.32625, "learning_rate": 6.5510948165031576e-06, "epoch": 1.7508076163349706, "total_flos": 1671005787945246720, "step": 435200 }, { "loss": 3.27375, "learning_rate": 6.550283691107228e-06, "epoch": 1.7512099158791652, "total_flos": 1671403291936972800, "step": 435300 }, { "loss": 3.2575, "learning_rate": 6.549472565711297e-06, "epoch": 1.7516122154233598, "total_flos": 1671786763626700800, "step": 435400 }, { "loss": 3.2775, "learning_rate": 6.548661440315366e-06, "epoch": 1.7520145149675546, "total_flos": 1672163766223380480, "step": 435500 }, { "loss": 3.2225, "learning_rate": 6.547850314919435e-06, "epoch": 1.7524168145117491, "total_flos": 1672526396598558720, "step": 435600 }, { "loss": 3.22, "learning_rate": 6.547039189523505e-06, "epoch": 1.752819114055944, "total_flos": 1672894545354424320, "step": 435700 }, { "loss": 3.23, "learning_rate": 6.5462280641275744e-06, "epoch": 1.7532214136001385, "total_flos": 1673272817337999360, "step": 435800 }, { "loss": 3.25875, "learning_rate": 6.545416938731644e-06, "epoch": 1.753623713144333, "total_flos": 1673657526547169280, "step": 435900 }, { "loss": 3.30125, "learning_rate": 6.544605813335713e-06, "epoch": 1.7540260126885276, "total_flos": 1674049055391375360, "step": 436000 }, { "loss": 3.28, "learning_rate": 6.543794687939783e-06, "epoch": 1.7544283122327222, "total_flos": 1674429425315635200, "step": 436100 }, { "loss": 3.2625, "learning_rate": 6.542983562543852e-06, "epoch": 1.7548306117769168, "total_flos": 1674806921857843200, "step": 436200 }, { "loss": 3.27, "learning_rate": 6.542172437147921e-06, "epoch": 1.7552329113211114, "total_flos": 1675179781685575680, "step": 436300 }, { "loss": 3.25625, "learning_rate": 6.5413613117519905e-06, "epoch": 1.755635210865306, "total_flos": 1675568219386798080, "step": 436400 }, { "loss": 3.26, "learning_rate": 6.5405501863560605e-06, "epoch": 1.7560375104095007, "total_flos": 1675944674925527040, "step": 436500 }, { "loss": 3.27125, "learning_rate": 6.53973906096013e-06, "epoch": 1.7564398099536953, "total_flos": 1676343883826012160, "step": 436600 }, { "loss": 3.24375, "learning_rate": 6.538927935564199e-06, "epoch": 1.75684210949789, "total_flos": 1676725910857850880, "step": 436700 }, { "loss": 3.20625, "learning_rate": 6.538116810168269e-06, "epoch": 1.7572444090420847, "total_flos": 1677102950633226240, "step": 436800 }, { "loss": 3.31875, "learning_rate": 6.537305684772338e-06, "epoch": 1.7576467085862792, "total_flos": 1677478274877358080, "step": 436900 }, { "loss": 3.29, "learning_rate": 6.536494559376407e-06, "epoch": 1.7580490081304738, "total_flos": 1677858363305779200, "step": 437000 }, { "loss": 3.26375, "learning_rate": 6.5356834339804765e-06, "epoch": 1.7584513076746684, "total_flos": 1678230368023511040, "step": 437100 }, { "loss": 3.28625, "learning_rate": 6.5348723085845466e-06, "epoch": 1.758853607218863, "total_flos": 1678620016687964160, "step": 437200 }, { "loss": 3.27875, "learning_rate": 6.534061183188616e-06, "epoch": 1.7592559067630575, "total_flos": 1678987682120785920, "step": 437300 }, { "loss": 3.195, "learning_rate": 6.533250057792685e-06, "epoch": 1.7596582063072521, "total_flos": 1679378100915363840, "step": 437400 }, { "loss": 3.2575, "learning_rate": 6.532438932396754e-06, "epoch": 1.760060505851447, "total_flos": 1679749346125455360, "step": 437500 }, { "loss": 3.22375, "learning_rate": 6.531627807000824e-06, "epoch": 1.7604628053956415, "total_flos": 1680127596864061440, "step": 437600 }, { "loss": 3.27625, "learning_rate": 6.530816681604893e-06, "epoch": 1.760865104939836, "total_flos": 1680516124856401920, "step": 437700 }, { "loss": 3.33125, "learning_rate": 6.530005556208963e-06, "epoch": 1.7612674044840309, "total_flos": 1680908997444894720, "step": 437800 }, { "loss": 3.32875, "learning_rate": 6.529194430813032e-06, "epoch": 1.7616697040282254, "total_flos": 1681285139620331520, "step": 437900 }, { "loss": 3.23, "learning_rate": 6.528383305417102e-06, "epoch": 1.76207200357242, "total_flos": 1681673348938137600, "step": 438000 }, { "loss": 3.2525, "learning_rate": 6.527572180021171e-06, "epoch": 1.7624743031166146, "total_flos": 1682055503439790080, "step": 438100 }, { "loss": 3.305, "learning_rate": 6.52676105462524e-06, "epoch": 1.7628766026608091, "total_flos": 1682441986603868160, "step": 438200 }, { "loss": 3.22125, "learning_rate": 6.525949929229309e-06, "epoch": 1.7632789022050037, "total_flos": 1682807065461719040, "step": 438300 }, { "loss": 3.23, "learning_rate": 6.5251388038333795e-06, "epoch": 1.7636812017491983, "total_flos": 1683182697757900800, "step": 438400 }, { "loss": 3.3225, "learning_rate": 6.524327678437449e-06, "epoch": 1.764083501293393, "total_flos": 1683551128009605120, "step": 438500 }, { "loss": 3.2825, "learning_rate": 6.523516553041518e-06, "epoch": 1.7644858008375877, "total_flos": 1683946454392012800, "step": 438600 }, { "loss": 3.1925, "learning_rate": 6.522705427645587e-06, "epoch": 1.7648881003817822, "total_flos": 1684325315923476480, "step": 438700 }, { "loss": 3.265, "learning_rate": 6.521894302249657e-06, "epoch": 1.765290399925977, "total_flos": 1684699243310899200, "step": 438800 }, { "loss": 3.285, "learning_rate": 6.521083176853726e-06, "epoch": 1.7656926994701716, "total_flos": 1685076872634163200, "step": 438900 }, { "loss": 3.22875, "learning_rate": 6.5202720514577955e-06, "epoch": 1.7660949990143662, "total_flos": 1685446046459781120, "step": 439000 }, { "loss": 3.28625, "learning_rate": 6.519460926061865e-06, "epoch": 1.7664972985585607, "total_flos": 1685826203934351360, "step": 439100 }, { "loss": 3.2075, "learning_rate": 6.518649800665935e-06, "epoch": 1.7668995981027553, "total_flos": 1686209569399234560, "step": 439200 }, { "loss": 3.2175, "learning_rate": 6.517838675270004e-06, "epoch": 1.76730189764695, "total_flos": 1686593965245112320, "step": 439300 }, { "loss": 3.255, "learning_rate": 6.517027549874073e-06, "epoch": 1.7677041971911445, "total_flos": 1686980305005649920, "step": 439400 }, { "loss": 3.31375, "learning_rate": 6.516216424478142e-06, "epoch": 1.768106496735339, "total_flos": 1687370251099668480, "step": 439500 }, { "loss": 3.32, "learning_rate": 6.515405299082212e-06, "epoch": 1.7685087962795338, "total_flos": 1687765742130585600, "step": 439600 }, { "loss": 3.235, "learning_rate": 6.5145941736862815e-06, "epoch": 1.7689110958237284, "total_flos": 1688150095486525440, "step": 439700 }, { "loss": 3.225, "learning_rate": 6.513783048290351e-06, "epoch": 1.7693133953679232, "total_flos": 1688557738639687680, "step": 439800 }, { "loss": 3.25625, "learning_rate": 6.51297192289442e-06, "epoch": 1.7697156949121178, "total_flos": 1688960176775454720, "step": 439900 }, { "loss": 3.3175, "learning_rate": 6.51216079749849e-06, "epoch": 1.7701179944563123, "total_flos": 1689341460233379840, "step": 440000 }, { "loss": 3.245, "learning_rate": 6.511349672102559e-06, "epoch": 1.770520294000507, "total_flos": 1689719137357824000, "step": 440100 }, { "loss": 3.295, "learning_rate": 6.510538546706628e-06, "epoch": 1.7709225935447015, "total_flos": 1690119891829800960, "step": 440200 }, { "loss": 3.2375, "learning_rate": 6.5097274213106976e-06, "epoch": 1.771324893088896, "total_flos": 1690500256442818560, "step": 440300 }, { "loss": 3.27875, "learning_rate": 6.508916295914768e-06, "epoch": 1.7717271926330906, "total_flos": 1690885682669690880, "step": 440400 }, { "loss": 3.29875, "learning_rate": 6.508105170518837e-06, "epoch": 1.7721294921772852, "total_flos": 1691278996091289600, "step": 440500 }, { "loss": 3.29625, "learning_rate": 6.507294045122906e-06, "epoch": 1.77253179172148, "total_flos": 1691676064561152000, "step": 440600 }, { "loss": 3.26625, "learning_rate": 6.506482919726975e-06, "epoch": 1.7729340912656746, "total_flos": 1692065782271754240, "step": 440700 }, { "loss": 3.2625, "learning_rate": 6.505671794331045e-06, "epoch": 1.7733363908098694, "total_flos": 1692467678660812800, "step": 440800 }, { "loss": 3.2625, "learning_rate": 6.5048606689351144e-06, "epoch": 1.773738690354064, "total_flos": 1692855579926568960, "step": 440900 }, { "loss": 3.22875, "learning_rate": 6.504049543539184e-06, "epoch": 1.7741409898982585, "total_flos": 1693246312084439040, "step": 441000 }, { "loss": 3.23125, "learning_rate": 6.503238418143253e-06, "epoch": 1.774543289442453, "total_flos": 1693626851968450560, "step": 441100 }, { "loss": 3.25625, "learning_rate": 6.502427292747323e-06, "epoch": 1.7749455889866477, "total_flos": 1694018566706135040, "step": 441200 }, { "loss": 3.21, "learning_rate": 6.501616167351392e-06, "epoch": 1.7753478885308422, "total_flos": 1694390210259394560, "step": 441300 }, { "loss": 3.24875, "learning_rate": 6.500805041955461e-06, "epoch": 1.7757501880750368, "total_flos": 1694762353069424640, "step": 441400 }, { "loss": 3.25375, "learning_rate": 6.499993916559531e-06, "epoch": 1.7761524876192314, "total_flos": 1695145968162693120, "step": 441500 }, { "loss": 3.25375, "learning_rate": 6.4991827911636005e-06, "epoch": 1.7765547871634262, "total_flos": 1695536769366712320, "step": 441600 }, { "loss": 3.235, "learning_rate": 6.49837166576767e-06, "epoch": 1.7769570867076208, "total_flos": 1695927416544706560, "step": 441700 }, { "loss": 3.2275, "learning_rate": 6.497560540371739e-06, "epoch": 1.7773593862518153, "total_flos": 1696311031637975040, "step": 441800 }, { "loss": 3.2625, "learning_rate": 6.496749414975809e-06, "epoch": 1.7777616857960101, "total_flos": 1696677225856696320, "step": 441900 }, { "loss": 3.27375, "learning_rate": 6.495938289579878e-06, "epoch": 1.7781639853402047, "total_flos": 1697058233130024960, "step": 442000 }, { "loss": 3.28125, "learning_rate": 6.495127164183947e-06, "epoch": 1.7785662848843993, "total_flos": 1697440392942919680, "step": 442100 }, { "loss": 3.27875, "learning_rate": 6.4943160387880165e-06, "epoch": 1.7789685844285938, "total_flos": 1697816960017735680, "step": 442200 }, { "loss": 3.27, "learning_rate": 6.4935049133920866e-06, "epoch": 1.7793708839727884, "total_flos": 1698202237529825280, "step": 442300 }, { "loss": 3.19875, "learning_rate": 6.492693787996156e-06, "epoch": 1.779773183516983, "total_flos": 1698576977537310720, "step": 442400 }, { "loss": 3.24625, "learning_rate": 6.491882662600225e-06, "epoch": 1.7801754830611776, "total_flos": 1698965712668098560, "step": 442500 }, { "loss": 3.2225, "learning_rate": 6.491071537204294e-06, "epoch": 1.7805777826053724, "total_flos": 1699372675982254080, "step": 442600 }, { "loss": 3.24125, "learning_rate": 6.490260411808364e-06, "epoch": 1.780980082149567, "total_flos": 1699757459548815360, "step": 442700 }, { "loss": 3.30875, "learning_rate": 6.489449286412433e-06, "epoch": 1.7813823816937615, "total_flos": 1700136995608043520, "step": 442800 }, { "loss": 3.2525, "learning_rate": 6.488638161016503e-06, "epoch": 1.7817846812379563, "total_flos": 1700519166043422720, "step": 442900 }, { "loss": 3.26375, "learning_rate": 6.487827035620572e-06, "epoch": 1.7821869807821509, "total_flos": 1700899838708490240, "step": 443000 }, { "loss": 3.2125, "learning_rate": 6.487015910224642e-06, "epoch": 1.7825892803263454, "total_flos": 1701276485451939840, "step": 443100 }, { "loss": 3.235, "learning_rate": 6.486204784828711e-06, "epoch": 1.78299157987054, "total_flos": 1701667047650058240, "step": 443200 }, { "loss": 3.2375, "learning_rate": 6.48539365943278e-06, "epoch": 1.7833938794147346, "total_flos": 1702056584778424320, "step": 443300 }, { "loss": 3.2725, "learning_rate": 6.484582534036849e-06, "epoch": 1.7837961789589292, "total_flos": 1702458906066862080, "step": 443400 }, { "loss": 3.26875, "learning_rate": 6.4837714086409195e-06, "epoch": 1.7841984785031237, "total_flos": 1702847439370444800, "step": 443500 }, { "loss": 3.28625, "learning_rate": 6.482960283244989e-06, "epoch": 1.7846007780473183, "total_flos": 1703223092911595520, "step": 443600 }, { "loss": 3.26875, "learning_rate": 6.482149157849058e-06, "epoch": 1.785003077591513, "total_flos": 1703609597320642560, "step": 443700 }, { "loss": 3.28125, "learning_rate": 6.481338032453127e-06, "epoch": 1.7854053771357077, "total_flos": 1703953633036738560, "step": 443800 }, { "loss": 3.28, "learning_rate": 6.480526907057197e-06, "epoch": 1.7858076766799025, "total_flos": 1704333806445035520, "step": 443900 }, { "loss": 3.285, "learning_rate": 6.479715781661266e-06, "epoch": 1.786209976224097, "total_flos": 1704712423659356160, "step": 444000 }, { "loss": 3.22375, "learning_rate": 6.4789046562653355e-06, "epoch": 1.7866122757682916, "total_flos": 1705081724954787840, "step": 444100 }, { "loss": 3.235, "learning_rate": 6.478093530869405e-06, "epoch": 1.7870145753124862, "total_flos": 1705452879873761280, "step": 444200 }, { "loss": 3.255, "learning_rate": 6.477282405473475e-06, "epoch": 1.7874168748566808, "total_flos": 1705828294409011200, "step": 444300 }, { "loss": 3.285, "learning_rate": 6.476471280077544e-06, "epoch": 1.7878191744008753, "total_flos": 1706229691541299200, "step": 444400 }, { "loss": 3.26875, "learning_rate": 6.475660154681613e-06, "epoch": 1.78822147394507, "total_flos": 1706629320029921280, "step": 444500 }, { "loss": 3.23875, "learning_rate": 6.474849029285682e-06, "epoch": 1.7886237734892645, "total_flos": 1707011639180083200, "step": 444600 }, { "loss": 3.25, "learning_rate": 6.474037903889752e-06, "epoch": 1.7890260730334593, "total_flos": 1707409350310256640, "step": 444700 }, { "loss": 3.1925, "learning_rate": 6.4732267784938215e-06, "epoch": 1.7894283725776539, "total_flos": 1707786012987432960, "step": 444800 }, { "loss": 3.20125, "learning_rate": 6.472415653097891e-06, "epoch": 1.7898306721218484, "total_flos": 1708171348923187200, "step": 444900 }, { "loss": 3.28875, "learning_rate": 6.47160452770196e-06, "epoch": 1.7902329716660432, "total_flos": 1708565432474910720, "step": 445000 }, { "loss": 3.22875, "learning_rate": 6.47079340230603e-06, "epoch": 1.7906352712102378, "total_flos": 1708944926044200960, "step": 445100 }, { "loss": 3.22875, "learning_rate": 6.469982276910099e-06, "epoch": 1.7910375707544324, "total_flos": 1709328408356413440, "step": 445200 }, { "loss": 3.275, "learning_rate": 6.469171151514168e-06, "epoch": 1.791439870298627, "total_flos": 1709714471932354560, "step": 445300 }, { "loss": 3.21125, "learning_rate": 6.4683600261182376e-06, "epoch": 1.7918421698428215, "total_flos": 1710105591810908160, "step": 445400 }, { "loss": 3.2225, "learning_rate": 6.467548900722308e-06, "epoch": 1.792244469387016, "total_flos": 1710485404054732800, "step": 445500 }, { "loss": 3.27875, "learning_rate": 6.466737775326377e-06, "epoch": 1.7926467689312107, "total_flos": 1710882313187328000, "step": 445600 }, { "loss": 3.2825, "learning_rate": 6.465926649930446e-06, "epoch": 1.7930490684754055, "total_flos": 1711263097388482560, "step": 445700 }, { "loss": 3.26375, "learning_rate": 6.465115524534515e-06, "epoch": 1.7934513680196, "total_flos": 1711656963179274240, "step": 445800 }, { "loss": 3.285, "learning_rate": 6.464304399138585e-06, "epoch": 1.7938536675637946, "total_flos": 1712055056718888960, "step": 445900 }, { "loss": 3.26625, "learning_rate": 6.4634932737426544e-06, "epoch": 1.7942559671079894, "total_flos": 1712441757643898880, "step": 446000 }, { "loss": 3.245, "learning_rate": 6.462682148346724e-06, "epoch": 1.794658266652184, "total_flos": 1712830976097730560, "step": 446100 }, { "loss": 3.26625, "learning_rate": 6.4618710229507945e-06, "epoch": 1.7950605661963785, "total_flos": 1713231619033620480, "step": 446200 }, { "loss": 3.26375, "learning_rate": 6.461059897554863e-06, "epoch": 1.7954628657405731, "total_flos": 1713594371567370240, "step": 446300 }, { "loss": 3.23, "learning_rate": 6.460248772158932e-06, "epoch": 1.7958651652847677, "total_flos": 1713978347825111040, "step": 446400 }, { "loss": 3.24875, "learning_rate": 6.459437646763001e-06, "epoch": 1.7962674648289623, "total_flos": 1714356895993282560, "step": 446500 }, { "loss": 3.27, "learning_rate": 6.458626521367072e-06, "epoch": 1.7966697643731568, "total_flos": 1714744526385684480, "step": 446600 }, { "loss": 3.2475, "learning_rate": 6.457815395971141e-06, "epoch": 1.7970720639173516, "total_flos": 1715125448679137280, "step": 446700 }, { "loss": 3.29625, "learning_rate": 6.45700427057521e-06, "epoch": 1.7974743634615462, "total_flos": 1715517041258250240, "step": 446800 }, { "loss": 3.2275, "learning_rate": 6.456193145179279e-06, "epoch": 1.7978766630057408, "total_flos": 1715900661662760960, "step": 446900 }, { "loss": 3.2275, "learning_rate": 6.45538201978335e-06, "epoch": 1.7982789625499356, "total_flos": 1716271620065771520, "step": 447000 }, { "loss": 3.1975, "learning_rate": 6.454570894387419e-06, "epoch": 1.7986812620941302, "total_flos": 1716673771394457600, "step": 447100 }, { "loss": 3.2625, "learning_rate": 6.453759768991487e-06, "epoch": 1.7990835616383247, "total_flos": 1717054677754183680, "step": 447200 }, { "loss": 3.2375, "learning_rate": 6.4529486435955565e-06, "epoch": 1.7994858611825193, "total_flos": 1717467361276231680, "step": 447300 }, { "loss": 3.2525, "learning_rate": 6.452137518199627e-06, "epoch": 1.7998881607267139, "total_flos": 1717831043277373440, "step": 447400 }, { "loss": 3.2475, "learning_rate": 6.451326392803697e-06, "epoch": 1.8002904602709084, "total_flos": 1718229142128230400, "step": 447500 }, { "loss": 3.2175, "learning_rate": 6.450515267407765e-06, "epoch": 1.800692759815103, "total_flos": 1718604328280064000, "step": 447600 }, { "loss": 3.2625, "learning_rate": 6.449704142011834e-06, "epoch": 1.8010950593592976, "total_flos": 1718985988836188160, "step": 447700 }, { "loss": 3.20375, "learning_rate": 6.448893016615905e-06, "epoch": 1.8014973589034924, "total_flos": 1719372105524551680, "step": 447800 }, { "loss": 3.225, "learning_rate": 6.448081891219974e-06, "epoch": 1.801899658447687, "total_flos": 1719737858910167040, "step": 447900 }, { "loss": 3.2225, "learning_rate": 6.447270765824043e-06, "epoch": 1.8023019579918818, "total_flos": 1720109433417277440, "step": 448000 }, { "loss": 3.22, "learning_rate": 6.446459640428112e-06, "epoch": 1.8027042575360763, "total_flos": 1720479170234572800, "step": 448100 }, { "loss": 3.25, "learning_rate": 6.445648515032183e-06, "epoch": 1.803106557080271, "total_flos": 1720867560134615040, "step": 448200 }, { "loss": 3.2875, "learning_rate": 6.444837389636252e-06, "epoch": 1.8035088566244655, "total_flos": 1721264495823421440, "step": 448300 }, { "loss": 3.21, "learning_rate": 6.44402626424032e-06, "epoch": 1.80391115616866, "total_flos": 1721630966226739200, "step": 448400 }, { "loss": 3.215, "learning_rate": 6.443215138844389e-06, "epoch": 1.8043134557128546, "total_flos": 1722014374181560320, "step": 448500 }, { "loss": 3.245, "learning_rate": 6.44240401344846e-06, "epoch": 1.8047157552570492, "total_flos": 1722388747713331200, "step": 448600 }, { "loss": 3.265, "learning_rate": 6.4415928880525295e-06, "epoch": 1.8051180548012438, "total_flos": 1722756997382799360, "step": 448700 }, { "loss": 3.24625, "learning_rate": 6.440781762656598e-06, "epoch": 1.8055203543454386, "total_flos": 1723149992129863680, "step": 448800 }, { "loss": 3.24375, "learning_rate": 6.439970637260667e-06, "epoch": 1.8059226538896331, "total_flos": 1723554315756625920, "step": 448900 }, { "loss": 3.2625, "learning_rate": 6.439159511864738e-06, "epoch": 1.8063249534338277, "total_flos": 1723937564374179840, "step": 449000 }, { "loss": 3.255, "learning_rate": 6.438348386468807e-06, "epoch": 1.8067272529780225, "total_flos": 1724322916243660800, "step": 449100 }, { "loss": 3.21125, "learning_rate": 6.437537261072876e-06, "epoch": 1.807129552522217, "total_flos": 1724696678982574080, "step": 449200 }, { "loss": 3.22125, "learning_rate": 6.436726135676945e-06, "epoch": 1.8075318520664116, "total_flos": 1725068715567759360, "step": 449300 }, { "loss": 3.23125, "learning_rate": 6.4359150102810156e-06, "epoch": 1.8079341516106062, "total_flos": 1725451013472952320, "step": 449400 }, { "loss": 3.205, "learning_rate": 6.435103884885085e-06, "epoch": 1.8083364511548008, "total_flos": 1725839323704360960, "step": 449500 }, { "loss": 3.25625, "learning_rate": 6.434292759489154e-06, "epoch": 1.8087387506989954, "total_flos": 1726226353926389760, "step": 449600 }, { "loss": 3.275, "learning_rate": 6.433481634093222e-06, "epoch": 1.80914105024319, "total_flos": 1726630767844270080, "step": 449700 }, { "loss": 3.2475, "learning_rate": 6.432670508697293e-06, "epoch": 1.8095433497873847, "total_flos": 1727023109308538880, "step": 449800 }, { "loss": 3.21625, "learning_rate": 6.431859383301362e-06, "epoch": 1.8099456493315793, "total_flos": 1727430986156359680, "step": 449900 }, { "loss": 3.22875, "learning_rate": 6.431048257905432e-06, "epoch": 1.8103479488757739, "total_flos": 1727816061841244160, "step": 450000 }, { "loss": 3.26, "learning_rate": 6.4302371325095e-06, "epoch": 1.8107502484199687, "total_flos": 1728195581966745600, "step": 450100 }, { "loss": 3.1725, "learning_rate": 6.429426007113571e-06, "epoch": 1.8111525479641633, "total_flos": 1728583722238402560, "step": 450200 }, { "loss": 3.235, "learning_rate": 6.42861488171764e-06, "epoch": 1.8115548475083578, "total_flos": 1728962461611294720, "step": 450300 }, { "loss": 3.255, "learning_rate": 6.427803756321709e-06, "epoch": 1.8119571470525524, "total_flos": 1729349231582453760, "step": 450400 }, { "loss": 3.2525, "learning_rate": 6.4269926309257776e-06, "epoch": 1.812359446596747, "total_flos": 1729730876204851200, "step": 450500 }, { "loss": 3.23625, "learning_rate": 6.4261815055298485e-06, "epoch": 1.8127617461409415, "total_flos": 1730099375502704640, "step": 450600 }, { "loss": 3.27, "learning_rate": 6.425370380133918e-06, "epoch": 1.8131640456851361, "total_flos": 1730487377682063360, "step": 450700 }, { "loss": 3.26, "learning_rate": 6.424559254737987e-06, "epoch": 1.8135663452293307, "total_flos": 1730868576160112640, "step": 450800 }, { "loss": 3.26125, "learning_rate": 6.423748129342057e-06, "epoch": 1.8139686447735255, "total_flos": 1731254947788103680, "step": 450900 }, { "loss": 3.25625, "learning_rate": 6.422937003946126e-06, "epoch": 1.81437094431772, "total_flos": 1731629491279626240, "step": 451000 }, { "loss": 3.25125, "learning_rate": 6.422125878550195e-06, "epoch": 1.8147732438619149, "total_flos": 1732001942141706240, "step": 451100 }, { "loss": 3.2275, "learning_rate": 6.4213147531542645e-06, "epoch": 1.8151755434061094, "total_flos": 1732381823431680000, "step": 451200 }, { "loss": 3.27625, "learning_rate": 6.4205036277583345e-06, "epoch": 1.815577842950304, "total_flos": 1732778328909864960, "step": 451300 }, { "loss": 3.2325, "learning_rate": 6.419692502362404e-06, "epoch": 1.8159801424944986, "total_flos": 1733169528457052160, "step": 451400 }, { "loss": 3.205, "learning_rate": 6.418881376966473e-06, "epoch": 1.8163824420386931, "total_flos": 1733547279938887680, "step": 451500 }, { "loss": 3.2025, "learning_rate": 6.418070251570542e-06, "epoch": 1.8167847415828877, "total_flos": 1733934565100544000, "step": 451600 }, { "loss": 3.225, "learning_rate": 6.417259126174612e-06, "epoch": 1.8171870411270823, "total_flos": 1734329477206056960, "step": 451700 }, { "loss": 3.24875, "learning_rate": 6.416448000778681e-06, "epoch": 1.8175893406712769, "total_flos": 1734704265014722560, "step": 451800 }, { "loss": 3.25125, "learning_rate": 6.4156368753827505e-06, "epoch": 1.8179916402154717, "total_flos": 1735069051754250240, "step": 451900 }, { "loss": 3.23875, "learning_rate": 6.41482574998682e-06, "epoch": 1.8183939397596662, "total_flos": 1735439760528875520, "step": 452000 }, { "loss": 3.21125, "learning_rate": 6.41401462459089e-06, "epoch": 1.818796239303861, "total_flos": 1735824777790095360, "step": 452100 }, { "loss": 3.18, "learning_rate": 6.413203499194959e-06, "epoch": 1.8191985388480556, "total_flos": 1736186638035148800, "step": 452200 }, { "loss": 3.23125, "learning_rate": 6.412392373799028e-06, "epoch": 1.8196008383922502, "total_flos": 1736599066617569280, "step": 452300 }, { "loss": 3.22875, "learning_rate": 6.411581248403097e-06, "epoch": 1.8200031379364447, "total_flos": 1736974752026173440, "step": 452400 }, { "loss": 3.24875, "learning_rate": 6.410770123007167e-06, "epoch": 1.8204054374806393, "total_flos": 1737357549188136960, "step": 452500 }, { "loss": 3.21875, "learning_rate": 6.409958997611237e-06, "epoch": 1.820807737024834, "total_flos": 1737732730028728320, "step": 452600 }, { "loss": 3.195, "learning_rate": 6.409147872215306e-06, "epoch": 1.8212100365690285, "total_flos": 1738116430101872640, "step": 452700 }, { "loss": 3.245, "learning_rate": 6.408336746819375e-06, "epoch": 1.821612336113223, "total_flos": 1738505791959244800, "step": 452800 }, { "loss": 3.19875, "learning_rate": 6.407525621423445e-06, "epoch": 1.8220146356574178, "total_flos": 1738877308042690560, "step": 452900 }, { "loss": 3.185, "learning_rate": 6.406714496027514e-06, "epoch": 1.8224169352016124, "total_flos": 1739256796300738560, "step": 453000 }, { "loss": 3.2025, "learning_rate": 6.4059033706315834e-06, "epoch": 1.822819234745807, "total_flos": 1739630532483440640, "step": 453100 }, { "loss": 3.30125, "learning_rate": 6.405092245235653e-06, "epoch": 1.8232215342900018, "total_flos": 1740019819983421440, "step": 453200 }, { "loss": 3.24125, "learning_rate": 6.404281119839723e-06, "epoch": 1.8236238338341964, "total_flos": 1740433810071060480, "step": 453300 }, { "loss": 3.2225, "learning_rate": 6.403469994443792e-06, "epoch": 1.824026133378391, "total_flos": 1740818837954764800, "step": 453400 }, { "loss": 3.265, "learning_rate": 6.402658869047861e-06, "epoch": 1.8244284329225855, "total_flos": 1741205342363811840, "step": 453500 }, { "loss": 3.26, "learning_rate": 6.40184774365193e-06, "epoch": 1.82483073246678, "total_flos": 1741593020557393920, "step": 453600 }, { "loss": 3.28875, "learning_rate": 6.401036618256e-06, "epoch": 1.8252330320109746, "total_flos": 1741969518586060800, "step": 453700 }, { "loss": 3.2525, "learning_rate": 6.4002254928600695e-06, "epoch": 1.8256353315551692, "total_flos": 1742337630163230720, "step": 453800 }, { "loss": 3.18, "learning_rate": 6.399414367464139e-06, "epoch": 1.826037631099364, "total_flos": 1742710808665497600, "step": 453900 }, { "loss": 3.2475, "learning_rate": 6.398603242068208e-06, "epoch": 1.8264399306435586, "total_flos": 1743091098921123840, "step": 454000 }, { "loss": 3.22875, "learning_rate": 6.397792116672278e-06, "epoch": 1.8268422301877532, "total_flos": 1743466619681218560, "step": 454100 }, { "loss": 3.21625, "learning_rate": 6.396980991276347e-06, "epoch": 1.827244529731948, "total_flos": 1743856815403622400, "step": 454200 }, { "loss": 3.17375, "learning_rate": 6.396169865880416e-06, "epoch": 1.8276468292761425, "total_flos": 1744229361868062720, "step": 454300 }, { "loss": 3.23625, "learning_rate": 6.3953587404844855e-06, "epoch": 1.828049128820337, "total_flos": 1744610528478658560, "step": 454400 }, { "loss": 3.235, "learning_rate": 6.3945476150885556e-06, "epoch": 1.8284514283645317, "total_flos": 1745013476493680640, "step": 454500 }, { "loss": 3.2625, "learning_rate": 6.393736489692625e-06, "epoch": 1.8288537279087262, "total_flos": 1745393883596636160, "step": 454600 }, { "loss": 3.22125, "learning_rate": 6.392925364296694e-06, "epoch": 1.8292560274529208, "total_flos": 1745776840095866880, "step": 454700 }, { "loss": 3.23, "learning_rate": 6.392114238900763e-06, "epoch": 1.8296583269971154, "total_flos": 1746177042198650880, "step": 454800 }, { "loss": 3.24125, "learning_rate": 6.391303113504833e-06, "epoch": 1.83006062654131, "total_flos": 1746566982981427200, "step": 454900 }, { "loss": 3.23375, "learning_rate": 6.390491988108902e-06, "epoch": 1.8304629260855048, "total_flos": 1746960572587622400, "step": 455000 }, { "loss": 3.23625, "learning_rate": 6.389680862712972e-06, "epoch": 1.8308652256296993, "total_flos": 1747325396505845760, "step": 455100 }, { "loss": 3.24875, "learning_rate": 6.388869737317041e-06, "epoch": 1.8312675251738941, "total_flos": 1747713074699427840, "step": 455200 }, { "loss": 3.2325, "learning_rate": 6.388058611921111e-06, "epoch": 1.8316698247180887, "total_flos": 1748100922852761600, "step": 455300 }, { "loss": 3.23625, "learning_rate": 6.38724748652518e-06, "epoch": 1.8320721242622833, "total_flos": 1748477001293291520, "step": 455400 }, { "loss": 3.21625, "learning_rate": 6.386436361129249e-06, "epoch": 1.8324744238064778, "total_flos": 1748862300050350080, "step": 455500 }, { "loss": 3.24625, "learning_rate": 6.385625235733318e-06, "epoch": 1.8328767233506724, "total_flos": 1749260170517790720, "step": 455600 }, { "loss": 3.27625, "learning_rate": 6.3848141103373885e-06, "epoch": 1.833279022894867, "total_flos": 1749657716999454720, "step": 455700 }, { "loss": 3.1775, "learning_rate": 6.384002984941458e-06, "epoch": 1.8336813224390616, "total_flos": 1750026832401408000, "step": 455800 }, { "loss": 3.23375, "learning_rate": 6.383191859545527e-06, "epoch": 1.8340836219832561, "total_flos": 1750395937180876800, "step": 455900 }, { "loss": 3.2525, "learning_rate": 6.382380734149597e-06, "epoch": 1.834485921527451, "total_flos": 1750785931076075520, "step": 456000 }, { "loss": 3.26875, "learning_rate": 6.381569608753666e-06, "epoch": 1.8348882210716455, "total_flos": 1751181762026496000, "step": 456100 }, { "loss": 3.19875, "learning_rate": 6.380758483357735e-06, "epoch": 1.8352905206158403, "total_flos": 1751568861294673920, "step": 456200 }, { "loss": 3.20125, "learning_rate": 6.3799473579618045e-06, "epoch": 1.8356928201600349, "total_flos": 1751947797183528960, "step": 456300 }, { "loss": 3.2425, "learning_rate": 6.3791362325658745e-06, "epoch": 1.8360951197042295, "total_flos": 1752332437346549760, "step": 456400 }, { "loss": 3.25125, "learning_rate": 6.378325107169944e-06, "epoch": 1.836497419248424, "total_flos": 1752695774116945920, "step": 456500 }, { "loss": 3.2125, "learning_rate": 6.377513981774013e-06, "epoch": 1.8368997187926186, "total_flos": 1753090898672148480, "step": 456600 }, { "loss": 3.27875, "learning_rate": 6.376702856378082e-06, "epoch": 1.8373020183368132, "total_flos": 1753474906797342720, "step": 456700 }, { "loss": 3.3275, "learning_rate": 6.375891730982152e-06, "epoch": 1.8377043178810077, "total_flos": 1753854416300359680, "step": 456800 }, { "loss": 3.205, "learning_rate": 6.375080605586221e-06, "epoch": 1.8381066174252023, "total_flos": 1754225645576724480, "step": 456900 }, { "loss": 3.2675, "learning_rate": 6.3742694801902905e-06, "epoch": 1.8385089169693971, "total_flos": 1754599424249364480, "step": 457000 }, { "loss": 3.2175, "learning_rate": 6.37345835479436e-06, "epoch": 1.8389112165135917, "total_flos": 1754987383938785280, "step": 457100 }, { "loss": 3.22625, "learning_rate": 6.37264722939843e-06, "epoch": 1.8393135160577863, "total_flos": 1755391670386851840, "step": 457200 }, { "loss": 3.2525, "learning_rate": 6.371836104002499e-06, "epoch": 1.839715815601981, "total_flos": 1755771663212912640, "step": 457300 }, { "loss": 3.22125, "learning_rate": 6.371024978606568e-06, "epoch": 1.8401181151461756, "total_flos": 1756144379637104640, "step": 457400 }, { "loss": 3.26, "learning_rate": 6.370213853210637e-06, "epoch": 1.8405204146903702, "total_flos": 1756543625716285440, "step": 457500 }, { "loss": 3.225, "learning_rate": 6.369402727814707e-06, "epoch": 1.8409227142345648, "total_flos": 1756933932974776320, "step": 457600 }, { "loss": 3.27375, "learning_rate": 6.368591602418777e-06, "epoch": 1.8413250137787593, "total_flos": 1757307578866360320, "step": 457700 }, { "loss": 3.25625, "learning_rate": 6.367780477022846e-06, "epoch": 1.841727313322954, "total_flos": 1757689122575155200, "step": 457800 }, { "loss": 3.29125, "learning_rate": 6.366969351626915e-06, "epoch": 1.8421296128671485, "total_flos": 1758078510988738560, "step": 457900 }, { "loss": 3.24125, "learning_rate": 6.366158226230985e-06, "epoch": 1.8425319124113433, "total_flos": 1758441948672737280, "step": 458000 }, { "loss": 3.24125, "learning_rate": 6.365347100835054e-06, "epoch": 1.8429342119555379, "total_flos": 1758832988882657280, "step": 458100 }, { "loss": 3.25125, "learning_rate": 6.3645359754391234e-06, "epoch": 1.8433365114997324, "total_flos": 1759221925840650240, "step": 458200 }, { "loss": 3.25, "learning_rate": 6.363724850043193e-06, "epoch": 1.8437388110439272, "total_flos": 1759624024056913920, "step": 458300 }, { "loss": 3.21125, "learning_rate": 6.362913724647263e-06, "epoch": 1.8441411105881218, "total_flos": 1760009970785525760, "step": 458400 }, { "loss": 3.26125, "learning_rate": 6.362102599251332e-06, "epoch": 1.8445434101323164, "total_flos": 1760387701022392320, "step": 458500 }, { "loss": 3.1925, "learning_rate": 6.361291473855401e-06, "epoch": 1.844945709676511, "total_flos": 1760772946667028480, "step": 458600 }, { "loss": 3.24375, "learning_rate": 6.36048034845947e-06, "epoch": 1.8453480092207055, "total_flos": 1761152854513213440, "step": 458700 }, { "loss": 3.17875, "learning_rate": 6.35966922306354e-06, "epoch": 1.8457503087649, "total_flos": 1761529368475607040, "step": 458800 }, { "loss": 3.285, "learning_rate": 6.3588580976676095e-06, "epoch": 1.8461526083090947, "total_flos": 1761911687625768960, "step": 458900 }, { "loss": 3.2, "learning_rate": 6.358046972271679e-06, "epoch": 1.8465549078532892, "total_flos": 1762288334369218560, "step": 459000 }, { "loss": 3.1675, "learning_rate": 6.357235846875748e-06, "epoch": 1.846957207397484, "total_flos": 1762664476544655360, "step": 459100 }, { "loss": 3.2125, "learning_rate": 6.356424721479818e-06, "epoch": 1.8473595069416786, "total_flos": 1763041744703447040, "step": 459200 }, { "loss": 3.235, "learning_rate": 6.355613596083887e-06, "epoch": 1.8477618064858734, "total_flos": 1763430336430694400, "step": 459300 }, { "loss": 3.22375, "learning_rate": 6.354802470687956e-06, "epoch": 1.848164106030068, "total_flos": 1763813468200919040, "step": 459400 }, { "loss": 3.2725, "learning_rate": 6.3539913452920255e-06, "epoch": 1.8485664055742626, "total_flos": 1764194236468346880, "step": 459500 }, { "loss": 3.255, "learning_rate": 6.3531802198960956e-06, "epoch": 1.8489687051184571, "total_flos": 1764581420716400640, "step": 459600 }, { "loss": 3.1975, "learning_rate": 6.352369094500165e-06, "epoch": 1.8493710046626517, "total_flos": 1764971095937064960, "step": 459700 }, { "loss": 3.19375, "learning_rate": 6.351557969104234e-06, "epoch": 1.8497733042068463, "total_flos": 1765353760317972480, "step": 459800 }, { "loss": 3.2175, "learning_rate": 6.350746843708303e-06, "epoch": 1.8501756037510408, "total_flos": 1765721383260856320, "step": 459900 }, { "loss": 3.18, "learning_rate": 6.349935718312373e-06, "epoch": 1.8505779032952354, "total_flos": 1766101546046668800, "step": 460000 }, { "loss": 3.1675, "learning_rate": 6.349124592916442e-06, "epoch": 1.8509802028394302, "total_flos": 1766485979071242240, "step": 460100 }, { "loss": 3.21125, "learning_rate": 6.348313467520512e-06, "epoch": 1.8513825023836248, "total_flos": 1766869503873392640, "step": 460200 }, { "loss": 3.22875, "learning_rate": 6.347502342124581e-06, "epoch": 1.8517848019278196, "total_flos": 1767249114290012160, "step": 460300 }, { "loss": 3.20375, "learning_rate": 6.346691216728651e-06, "epoch": 1.8521871014720142, "total_flos": 1767619270695444480, "step": 460400 }, { "loss": 3.22625, "learning_rate": 6.34588009133272e-06, "epoch": 1.8525894010162087, "total_flos": 1768028740915937280, "step": 460500 }, { "loss": 3.18625, "learning_rate": 6.345068965936789e-06, "epoch": 1.8529917005604033, "total_flos": 1768390165639127040, "step": 460600 }, { "loss": 3.19875, "learning_rate": 6.344257840540859e-06, "epoch": 1.8533940001045979, "total_flos": 1768777482668236800, "step": 460700 }, { "loss": 3.16, "learning_rate": 6.3434467151449285e-06, "epoch": 1.8537962996487924, "total_flos": 1769160789709455360, "step": 460800 }, { "loss": 3.28375, "learning_rate": 6.342635589748998e-06, "epoch": 1.854198599192987, "total_flos": 1769568406306406400, "step": 460900 }, { "loss": 3.18375, "learning_rate": 6.341824464353067e-06, "epoch": 1.8546008987371816, "total_flos": 1769943066645258240, "step": 461000 }, { "loss": 3.26875, "learning_rate": 6.341013338957137e-06, "epoch": 1.8550031982813764, "total_flos": 1770329512630640640, "step": 461100 }, { "loss": 3.25875, "learning_rate": 6.340202213561206e-06, "epoch": 1.855405497825571, "total_flos": 1770716935884595200, "step": 461200 }, { "loss": 3.23, "learning_rate": 6.339391088165275e-06, "epoch": 1.8558077973697655, "total_flos": 1771107641486254080, "step": 461300 }, { "loss": 3.22125, "learning_rate": 6.3385799627693445e-06, "epoch": 1.8562100969139603, "total_flos": 1771483661503119360, "step": 461400 }, { "loss": 3.2775, "learning_rate": 6.3377688373734145e-06, "epoch": 1.856612396458155, "total_flos": 1771871801774776320, "step": 461500 }, { "loss": 3.21875, "learning_rate": 6.336957711977484e-06, "epoch": 1.8570146960023495, "total_flos": 1772245357375242240, "step": 461600 }, { "loss": 3.17125, "learning_rate": 6.336146586581553e-06, "epoch": 1.857416995546544, "total_flos": 1772643928926658560, "step": 461700 }, { "loss": 3.1925, "learning_rate": 6.335335461185622e-06, "epoch": 1.8578192950907386, "total_flos": 1773035999517573120, "step": 461800 }, { "loss": 3.17, "learning_rate": 6.334524335789692e-06, "epoch": 1.8582215946349332, "total_flos": 1773433848740044800, "step": 461900 }, { "loss": 3.25375, "learning_rate": 6.333713210393761e-06, "epoch": 1.8586238941791278, "total_flos": 1773815875771883520, "step": 462000 }, { "loss": 3.18875, "learning_rate": 6.3329020849978305e-06, "epoch": 1.8590261937233226, "total_flos": 1774197100806144000, "step": 462100 }, { "loss": 3.23625, "learning_rate": 6.3320909596019e-06, "epoch": 1.8594284932675171, "total_flos": 1774583753929973760, "step": 462200 }, { "loss": 3.28125, "learning_rate": 6.33127983420597e-06, "epoch": 1.8598307928117117, "total_flos": 1774973949652377600, "step": 462300 }, { "loss": 3.20625, "learning_rate": 6.330468708810039e-06, "epoch": 1.8602330923559065, "total_flos": 1775345577271910400, "step": 462400 }, { "loss": 3.15625, "learning_rate": 6.329657583414108e-06, "epoch": 1.860635391900101, "total_flos": 1775745689083576320, "step": 462500 }, { "loss": 3.19625, "learning_rate": 6.328846458018177e-06, "epoch": 1.8610376914442956, "total_flos": 1776140362183188480, "step": 462600 }, { "loss": 3.21, "learning_rate": 6.328035332622247e-06, "epoch": 1.8614399909884902, "total_flos": 1776517109840240640, "step": 462700 }, { "loss": 3.17875, "learning_rate": 6.327224207226317e-06, "epoch": 1.8618422905326848, "total_flos": 1776907199337799680, "step": 462800 }, { "loss": 3.22, "learning_rate": 6.326413081830386e-06, "epoch": 1.8622445900768794, "total_flos": 1777272995213352960, "step": 462900 }, { "loss": 3.235, "learning_rate": 6.325601956434455e-06, "epoch": 1.862646889621074, "total_flos": 1777657003338547200, "step": 463000 }, { "loss": 3.1975, "learning_rate": 6.324790831038525e-06, "epoch": 1.8630491891652685, "total_flos": 1778029140837335040, "step": 463100 }, { "loss": 3.275, "learning_rate": 6.323979705642594e-06, "epoch": 1.8634514887094633, "total_flos": 1778416149814394880, "step": 463200 }, { "loss": 3.1975, "learning_rate": 6.3231685802466634e-06, "epoch": 1.8638537882536579, "total_flos": 1778786911701442560, "step": 463300 }, { "loss": 3.16625, "learning_rate": 6.322357454850733e-06, "epoch": 1.8642560877978527, "total_flos": 1779169528281169920, "step": 463400 }, { "loss": 3.2375, "learning_rate": 6.321546329454803e-06, "epoch": 1.8646583873420473, "total_flos": 1779551178214809600, "step": 463500 }, { "loss": 3.23875, "learning_rate": 6.320735204058872e-06, "epoch": 1.8650606868862418, "total_flos": 1779940699409448960, "step": 463600 }, { "loss": 3.2175, "learning_rate": 6.319924078662941e-06, "epoch": 1.8654629864304364, "total_flos": 1780344741540372480, "step": 463700 }, { "loss": 3.19875, "learning_rate": 6.31911295326701e-06, "epoch": 1.865865285974631, "total_flos": 1780726147156869120, "step": 463800 }, { "loss": 3.23, "learning_rate": 6.31830182787108e-06, "epoch": 1.8662675855188255, "total_flos": 1781112816214425600, "step": 463900 }, { "loss": 3.20875, "learning_rate": 6.3174907024751495e-06, "epoch": 1.8666698850630201, "total_flos": 1781489494825328640, "step": 464000 }, { "loss": 3.23875, "learning_rate": 6.316679577079219e-06, "epoch": 1.8670721846072147, "total_flos": 1781875430931456000, "step": 464100 }, { "loss": 3.2375, "learning_rate": 6.315868451683288e-06, "epoch": 1.8674744841514095, "total_flos": 1782250638328258560, "step": 464200 }, { "loss": 3.22125, "learning_rate": 6.315057326287358e-06, "epoch": 1.867876783695604, "total_flos": 1782623721228165120, "step": 464300 }, { "loss": 3.2325, "learning_rate": 6.314246200891427e-06, "epoch": 1.8682790832397989, "total_flos": 1783015345674731520, "step": 464400 }, { "loss": 3.25375, "learning_rate": 6.313435075495496e-06, "epoch": 1.8686813827839934, "total_flos": 1783394961402593280, "step": 464500 }, { "loss": 3.16625, "learning_rate": 6.3126239500995655e-06, "epoch": 1.869083682328188, "total_flos": 1783795179439104000, "step": 464600 }, { "loss": 3.11125, "learning_rate": 6.3118128247036356e-06, "epoch": 1.8694859818723826, "total_flos": 1784180749069516800, "step": 464700 }, { "loss": 3.24, "learning_rate": 6.311001699307705e-06, "epoch": 1.8698882814165771, "total_flos": 1784564411963965440, "step": 464800 }, { "loss": 3.245, "learning_rate": 6.310190573911774e-06, "epoch": 1.8702905809607717, "total_flos": 1784950103752949760, "step": 464900 }, { "loss": 3.24375, "learning_rate": 6.309379448515843e-06, "epoch": 1.8706928805049663, "total_flos": 1785331854600192000, "step": 465000 }, { "loss": 3.245, "learning_rate": 6.308568323119913e-06, "epoch": 1.8710951800491609, "total_flos": 1785712240458178560, "step": 465100 }, { "loss": 3.24, "learning_rate": 6.307757197723982e-06, "epoch": 1.8714974795933557, "total_flos": 1786096397298155520, "step": 465200 }, { "loss": 3.23875, "learning_rate": 6.306946072328052e-06, "epoch": 1.8718997791375502, "total_flos": 1786472364202598400, "step": 465300 }, { "loss": 3.2075, "learning_rate": 6.306134946932122e-06, "epoch": 1.8723020786817448, "total_flos": 1786845277142753280, "step": 465400 }, { "loss": 3.18875, "learning_rate": 6.305323821536191e-06, "epoch": 1.8727043782259396, "total_flos": 1787223708463595520, "step": 465500 }, { "loss": 3.175, "learning_rate": 6.30451269614026e-06, "epoch": 1.8731066777701342, "total_flos": 1787615879968112640, "step": 465600 }, { "loss": 3.205, "learning_rate": 6.303701570744329e-06, "epoch": 1.8735089773143287, "total_flos": 1788015540324188160, "step": 465700 }, { "loss": 3.20125, "learning_rate": 6.302890445348399e-06, "epoch": 1.8739112768585233, "total_flos": 1788403032624291840, "step": 465800 }, { "loss": 3.2425, "learning_rate": 6.3020793199524685e-06, "epoch": 1.874313576402718, "total_flos": 1788784868451409920, "step": 465900 }, { "loss": 3.2375, "learning_rate": 6.301268194556538e-06, "epoch": 1.8747158759469125, "total_flos": 1789160909713244160, "step": 466000 }, { "loss": 3.155, "learning_rate": 6.300457069160607e-06, "epoch": 1.875118175491107, "total_flos": 1789540392660049920, "step": 466100 }, { "loss": 3.22875, "learning_rate": 6.299645943764677e-06, "epoch": 1.8755204750353018, "total_flos": 1789941550786437120, "step": 466200 }, { "loss": 3.21875, "learning_rate": 6.298834818368746e-06, "epoch": 1.8759227745794964, "total_flos": 1790322064114237440, "step": 466300 }, { "loss": 3.20375, "learning_rate": 6.298023692972815e-06, "epoch": 1.876325074123691, "total_flos": 1790695476311162880, "step": 466400 }, { "loss": 3.1625, "learning_rate": 6.2972125675768845e-06, "epoch": 1.8767273736678858, "total_flos": 1791070524370698240, "step": 466500 }, { "loss": 3.25875, "learning_rate": 6.296401442180955e-06, "epoch": 1.8771296732120804, "total_flos": 1791477115897896960, "step": 466600 }, { "loss": 3.19125, "learning_rate": 6.295590316785024e-06, "epoch": 1.877531972756275, "total_flos": 1791875650270617600, "step": 466700 }, { "loss": 3.23625, "learning_rate": 6.294779191389093e-06, "epoch": 1.8779342723004695, "total_flos": 1792264380090163200, "step": 466800 }, { "loss": 3.2, "learning_rate": 6.293968065993162e-06, "epoch": 1.878336571844664, "total_flos": 1792640161101127680, "step": 466900 }, { "loss": 3.20875, "learning_rate": 6.293156940597233e-06, "epoch": 1.8787388713888586, "total_flos": 1793018666779361280, "step": 467000 }, { "loss": 3.225, "learning_rate": 6.292345815201301e-06, "epoch": 1.8791411709330532, "total_flos": 1793402149091573760, "step": 467100 }, { "loss": 3.1925, "learning_rate": 6.2915346898053705e-06, "epoch": 1.8795434704772478, "total_flos": 1793783644999188480, "step": 467200 }, { "loss": 3.1725, "learning_rate": 6.29072356440944e-06, "epoch": 1.8799457700214426, "total_flos": 1794167812461649920, "step": 467300 }, { "loss": 3.15125, "learning_rate": 6.289912439013511e-06, "epoch": 1.8803480695656372, "total_flos": 1794537400564162560, "step": 467400 }, { "loss": 3.23625, "learning_rate": 6.289101313617579e-06, "epoch": 1.880750369109832, "total_flos": 1794917690819788800, "step": 467500 }, { "loss": 3.20125, "learning_rate": 6.288290188221648e-06, "epoch": 1.8811526686540265, "total_flos": 1795282636896583680, "step": 467600 }, { "loss": 3.225, "learning_rate": 6.287479062825717e-06, "epoch": 1.881554968198221, "total_flos": 1795665699620659200, "step": 467700 }, { "loss": 3.20125, "learning_rate": 6.286667937429788e-06, "epoch": 1.8819572677424157, "total_flos": 1796035728556277760, "step": 467800 }, { "loss": 3.20125, "learning_rate": 6.285856812033857e-06, "epoch": 1.8823595672866102, "total_flos": 1796408328133140480, "step": 467900 }, { "loss": 3.13, "learning_rate": 6.285045686637926e-06, "epoch": 1.8827618668308048, "total_flos": 1796791162473799680, "step": 468000 }, { "loss": 3.1475, "learning_rate": 6.284234561241995e-06, "epoch": 1.8831641663749994, "total_flos": 1797182542603223040, "step": 468100 }, { "loss": 3.2, "learning_rate": 6.283423435846066e-06, "epoch": 1.883566465919194, "total_flos": 1797558259879280640, "step": 468200 }, { "loss": 3.21, "learning_rate": 6.282612310450134e-06, "epoch": 1.8839687654633888, "total_flos": 1797946878162739200, "step": 468300 }, { "loss": 3.175, "learning_rate": 6.2818011850542034e-06, "epoch": 1.8843710650075833, "total_flos": 1798344907967447040, "step": 468400 }, { "loss": 3.22875, "learning_rate": 6.280990059658273e-06, "epoch": 1.884773364551778, "total_flos": 1798728384968417280, "step": 468500 }, { "loss": 3.17375, "learning_rate": 6.2801789342623435e-06, "epoch": 1.8851756640959727, "total_flos": 1799108834561310720, "step": 468600 }, { "loss": 3.18875, "learning_rate": 6.279367808866412e-06, "epoch": 1.8855779636401673, "total_flos": 1799492970156318720, "step": 468700 }, { "loss": 3.24125, "learning_rate": 6.278556683470481e-06, "epoch": 1.8859802631843618, "total_flos": 1799888955132764160, "step": 468800 }, { "loss": 3.24625, "learning_rate": 6.27774555807455e-06, "epoch": 1.8863825627285564, "total_flos": 1800273069482803200, "step": 468900 }, { "loss": 3.1575, "learning_rate": 6.276934432678621e-06, "epoch": 1.886784862272751, "total_flos": 1800656254365450240, "step": 469000 }, { "loss": 3.22375, "learning_rate": 6.2761233072826895e-06, "epoch": 1.8871871618169456, "total_flos": 1801036964209213440, "step": 469100 }, { "loss": 3.20375, "learning_rate": 6.275312181886759e-06, "epoch": 1.8875894613611401, "total_flos": 1801413637508874240, "step": 469200 }, { "loss": 3.1875, "learning_rate": 6.274501056490828e-06, "epoch": 1.887991760905335, "total_flos": 1801793285104189440, "step": 469300 }, { "loss": 3.20375, "learning_rate": 6.273689931094899e-06, "epoch": 1.8883940604495295, "total_flos": 1802174446403543040, "step": 469400 }, { "loss": 3.21375, "learning_rate": 6.272878805698968e-06, "epoch": 1.888796359993724, "total_flos": 1802568089122160640, "step": 469500 }, { "loss": 3.20625, "learning_rate": 6.272067680303036e-06, "epoch": 1.8891986595379189, "total_flos": 1802949404447539200, "step": 469600 }, { "loss": 3.24875, "learning_rate": 6.2712565549071055e-06, "epoch": 1.8896009590821135, "total_flos": 1803344847677276160, "step": 469700 }, { "loss": 3.23375, "learning_rate": 6.270445429511176e-06, "epoch": 1.890003258626308, "total_flos": 1803723634851348480, "step": 469800 }, { "loss": 3.16375, "learning_rate": 6.269634304115246e-06, "epoch": 1.8904055581705026, "total_flos": 1804105922134056960, "step": 469900 }, { "loss": 3.2175, "learning_rate": 6.268823178719314e-06, "epoch": 1.8908078577146972, "total_flos": 1804474681682780160, "step": 470000 }, { "loss": 3.225, "learning_rate": 6.268012053323385e-06, "epoch": 1.8912101572588917, "total_flos": 1804860405339217920, "step": 470100 }, { "loss": 3.2075, "learning_rate": 6.267200927927454e-06, "epoch": 1.8916124568030863, "total_flos": 1805235920788070400, "step": 470200 }, { "loss": 3.19, "learning_rate": 6.266389802531523e-06, "epoch": 1.8920147563472811, "total_flos": 1805600086112256000, "step": 470300 }, { "loss": 3.21625, "learning_rate": 6.265578677135592e-06, "epoch": 1.8924170558914757, "total_flos": 1805968638522531840, "step": 470400 }, { "loss": 3.175, "learning_rate": 6.2647675517396625e-06, "epoch": 1.8928193554356703, "total_flos": 1806351770292756480, "step": 470500 }, { "loss": 3.2025, "learning_rate": 6.263956426343732e-06, "epoch": 1.893221654979865, "total_flos": 1806743931174789120, "step": 470600 }, { "loss": 3.1775, "learning_rate": 6.263145300947801e-06, "epoch": 1.8936239545240596, "total_flos": 1807128385444331520, "step": 470700 }, { "loss": 3.215, "learning_rate": 6.262334175551869e-06, "epoch": 1.8940262540682542, "total_flos": 1807503688443494400, "step": 470800 }, { "loss": 3.175, "learning_rate": 6.26152305015594e-06, "epoch": 1.8944285536124488, "total_flos": 1807871736285757440, "step": 470900 }, { "loss": 3.265, "learning_rate": 6.260711924760009e-06, "epoch": 1.8948308531566433, "total_flos": 1808244250882744320, "step": 471000 }, { "loss": 3.185, "learning_rate": 6.2599007993640785e-06, "epoch": 1.895233152700838, "total_flos": 1808619511391969280, "step": 471100 }, { "loss": 3.275, "learning_rate": 6.259089673968147e-06, "epoch": 1.8956354522450325, "total_flos": 1809002744075796480, "step": 471200 }, { "loss": 3.2125, "learning_rate": 6.258278548572218e-06, "epoch": 1.896037751789227, "total_flos": 1809400168398888960, "step": 471300 }, { "loss": 3.1575, "learning_rate": 6.257467423176287e-06, "epoch": 1.8964400513334219, "total_flos": 1809785668983152640, "step": 471400 }, { "loss": 3.24, "learning_rate": 6.256656297780356e-06, "epoch": 1.8968423508776164, "total_flos": 1810167021487226880, "step": 471500 }, { "loss": 3.235, "learning_rate": 6.2558451723844245e-06, "epoch": 1.8972446504218112, "total_flos": 1810552962904596480, "step": 471600 }, { "loss": 3.25, "learning_rate": 6.255034046988495e-06, "epoch": 1.8976469499660058, "total_flos": 1810957854834278400, "step": 471700 }, { "loss": 3.2225, "learning_rate": 6.2542229215925646e-06, "epoch": 1.8980492495102004, "total_flos": 1811351327593144320, "step": 471800 }, { "loss": 3.235, "learning_rate": 6.253411796196634e-06, "epoch": 1.898451549054395, "total_flos": 1811721260926402560, "step": 471900 }, { "loss": 3.2375, "learning_rate": 6.252600670800703e-06, "epoch": 1.8988538485985895, "total_flos": 1812112826949304320, "step": 472000 }, { "loss": 3.2025, "learning_rate": 6.251789545404773e-06, "epoch": 1.899256148142784, "total_flos": 1812503458193571840, "step": 472100 }, { "loss": 3.195, "learning_rate": 6.250978420008842e-06, "epoch": 1.8996584476869787, "total_flos": 1812875096435589120, "step": 472200 }, { "loss": 3.2425, "learning_rate": 6.250167294612911e-06, "epoch": 1.9000607472311732, "total_flos": 1813272106481786880, "step": 472300 }, { "loss": 3.165, "learning_rate": 6.249356169216981e-06, "epoch": 1.900463046775368, "total_flos": 1813677035590164480, "step": 472400 }, { "loss": 3.2425, "learning_rate": 6.248545043821051e-06, "epoch": 1.9008653463195626, "total_flos": 1814054170967900160, "step": 472500 }, { "loss": 3.1975, "learning_rate": 6.24773391842512e-06, "epoch": 1.9012676458637572, "total_flos": 1814451361596334080, "step": 472600 }, { "loss": 3.165, "learning_rate": 6.246922793029189e-06, "epoch": 1.901669945407952, "total_flos": 1814841775079669760, "step": 472700 }, { "loss": 3.16, "learning_rate": 6.246111667633258e-06, "epoch": 1.9020722449521466, "total_flos": 1815221927242997760, "step": 472800 }, { "loss": 3.17, "learning_rate": 6.245300542237328e-06, "epoch": 1.9024745444963411, "total_flos": 1815614013767639040, "step": 472900 }, { "loss": 3.1825, "learning_rate": 6.2444894168413975e-06, "epoch": 1.9028768440405357, "total_flos": 1816001506067742720, "step": 473000 }, { "loss": 3.21, "learning_rate": 6.243678291445467e-06, "epoch": 1.9032791435847303, "total_flos": 1816392503787724800, "step": 473100 }, { "loss": 3.2125, "learning_rate": 6.242867166049536e-06, "epoch": 1.9036814431289248, "total_flos": 1816764944027320320, "step": 473200 }, { "loss": 3.22, "learning_rate": 6.242056040653606e-06, "epoch": 1.9040837426731194, "total_flos": 1817153849117859840, "step": 473300 }, { "loss": 3.16, "learning_rate": 6.241244915257675e-06, "epoch": 1.9044860422173142, "total_flos": 1817528833442488320, "step": 473400 }, { "loss": 3.1975, "learning_rate": 6.240433789861744e-06, "epoch": 1.9048883417615088, "total_flos": 1817908799712337920, "step": 473500 }, { "loss": 3.2025, "learning_rate": 6.2396226644658135e-06, "epoch": 1.9052906413057034, "total_flos": 1818297030275112960, "step": 473600 }, { "loss": 3.1925, "learning_rate": 6.2388115390698835e-06, "epoch": 1.9056929408498982, "total_flos": 1818676045832601600, "step": 473700 }, { "loss": 3.15, "learning_rate": 6.238000413673953e-06, "epoch": 1.9060952403940927, "total_flos": 1819064786274631680, "step": 473800 }, { "loss": 3.1975, "learning_rate": 6.237189288278022e-06, "epoch": 1.9064975399382873, "total_flos": 1819444099261685760, "step": 473900 }, { "loss": 3.195, "learning_rate": 6.236378162882091e-06, "epoch": 1.9068998394824819, "total_flos": 1819829121834147840, "step": 474000 }, { "loss": 3.18, "learning_rate": 6.235567037486161e-06, "epoch": 1.9073021390266764, "total_flos": 1820224426971586560, "step": 474100 }, { "loss": 3.1975, "learning_rate": 6.23475591209023e-06, "epoch": 1.907704438570871, "total_flos": 1820607128531189760, "step": 474200 }, { "loss": 3.2025, "learning_rate": 6.2339447866942995e-06, "epoch": 1.9081067381150656, "total_flos": 1820989644197314560, "step": 474300 }, { "loss": 3.2325, "learning_rate": 6.233133661298369e-06, "epoch": 1.9085090376592602, "total_flos": 1821380530381209600, "step": 474400 }, { "loss": 3.1825, "learning_rate": 6.232322535902439e-06, "epoch": 1.908911337203455, "total_flos": 1821767427822182400, "step": 474500 }, { "loss": 3.2, "learning_rate": 6.231511410506508e-06, "epoch": 1.9093136367476495, "total_flos": 1822156656898498560, "step": 474600 }, { "loss": 3.175, "learning_rate": 6.230700285110577e-06, "epoch": 1.9097159362918443, "total_flos": 1822538928247480320, "step": 474700 }, { "loss": 3.225, "learning_rate": 6.229889159714647e-06, "epoch": 1.910118235836039, "total_flos": 1822932188556656640, "step": 474800 }, { "loss": 3.1875, "learning_rate": 6.229078034318716e-06, "epoch": 1.9105205353802335, "total_flos": 1823318172463964160, "step": 474900 }, { "loss": 3.19, "learning_rate": 6.228266908922786e-06, "epoch": 1.910922834924428, "total_flos": 1823701144896921600, "step": 475000 }, { "loss": 3.19, "learning_rate": 6.227455783526855e-06, "epoch": 1.9113251344686226, "total_flos": 1824082991346524160, "step": 475100 }, { "loss": 3.2675, "learning_rate": 6.226644658130925e-06, "epoch": 1.9117274340128172, "total_flos": 1824466059381841920, "step": 475200 }, { "loss": 3.24, "learning_rate": 6.225833532734994e-06, "epoch": 1.9121297335570118, "total_flos": 1824849138039644160, "step": 475300 }, { "loss": 3.1425, "learning_rate": 6.225022407339063e-06, "epoch": 1.9125320331012063, "total_flos": 1825240996180869120, "step": 475400 }, { "loss": 3.1825, "learning_rate": 6.2242112819431324e-06, "epoch": 1.9129343326454011, "total_flos": 1825617154290032640, "step": 475500 }, { "loss": 3.24, "learning_rate": 6.2234001565472025e-06, "epoch": 1.9133366321895957, "total_flos": 1825981085919559680, "step": 475600 }, { "loss": 3.1575, "learning_rate": 6.222589031151272e-06, "epoch": 1.9137389317337905, "total_flos": 1826363447559659520, "step": 475700 }, { "loss": 3.155, "learning_rate": 6.221777905755341e-06, "epoch": 1.914141231277985, "total_flos": 1826747099831623680, "step": 475800 }, { "loss": 3.2025, "learning_rate": 6.22096678035941e-06, "epoch": 1.9145435308221797, "total_flos": 1827135590645268480, "step": 475900 }, { "loss": 3.215, "learning_rate": 6.22015565496348e-06, "epoch": 1.9149458303663742, "total_flos": 1827529323655004160, "step": 476000 }, { "loss": 3.265, "learning_rate": 6.219344529567549e-06, "epoch": 1.9153481299105688, "total_flos": 1827919296305233920, "step": 476100 }, { "loss": 3.23, "learning_rate": 6.2185334041716185e-06, "epoch": 1.9157504294547634, "total_flos": 1828300022082723840, "step": 476200 }, { "loss": 3.22, "learning_rate": 6.217722278775688e-06, "epoch": 1.916152728998958, "total_flos": 1828698365250723840, "step": 476300 }, { "loss": 3.1975, "learning_rate": 6.216911153379758e-06, "epoch": 1.9165550285431525, "total_flos": 1829064479800811520, "step": 476400 }, { "loss": 3.1775, "learning_rate": 6.216100027983827e-06, "epoch": 1.9169573280873473, "total_flos": 1829441811694510080, "step": 476500 }, { "loss": 3.1775, "learning_rate": 6.215288902587896e-06, "epoch": 1.9173596276315419, "total_flos": 1829836824713625600, "step": 476600 }, { "loss": 3.2175, "learning_rate": 6.214477777191965e-06, "epoch": 1.9177619271757365, "total_flos": 1830235444066222080, "step": 476700 }, { "loss": 3.205, "learning_rate": 6.213666651796035e-06, "epoch": 1.9181642267199313, "total_flos": 1830606200642027520, "step": 476800 }, { "loss": 3.185, "learning_rate": 6.2128555264001046e-06, "epoch": 1.9185665262641258, "total_flos": 1830990224700948480, "step": 476900 }, { "loss": 3.185, "learning_rate": 6.212044401004174e-06, "epoch": 1.9189688258083204, "total_flos": 1831372379202600960, "step": 477000 }, { "loss": 3.165, "learning_rate": 6.211233275608243e-06, "epoch": 1.919371125352515, "total_flos": 1831772363544453120, "step": 477100 }, { "loss": 3.2375, "learning_rate": 6.210422150212313e-06, "epoch": 1.9197734248967095, "total_flos": 1832174557363077120, "step": 477200 }, { "loss": 3.19, "learning_rate": 6.209611024816382e-06, "epoch": 1.9201757244409041, "total_flos": 1832559399353303040, "step": 477300 }, { "loss": 3.21, "learning_rate": 6.208799899420451e-06, "epoch": 1.9205780239850987, "total_flos": 1832951092846018560, "step": 477400 }, { "loss": 3.13, "learning_rate": 6.207988774024521e-06, "epoch": 1.9209803235292935, "total_flos": 1833334044034007040, "step": 477500 }, { "loss": 3.16, "learning_rate": 6.207177648628591e-06, "epoch": 1.921382623073488, "total_flos": 1833697954418565120, "step": 477600 }, { "loss": 3.2, "learning_rate": 6.20636652323266e-06, "epoch": 1.9217849226176826, "total_flos": 1834104121046384640, "step": 477700 }, { "loss": 3.24, "learning_rate": 6.205555397836729e-06, "epoch": 1.9221872221618774, "total_flos": 1834491491187916800, "step": 477800 }, { "loss": 3.135, "learning_rate": 6.204744272440798e-06, "epoch": 1.922589521706072, "total_flos": 1834898863467724800, "step": 477900 }, { "loss": 3.2625, "learning_rate": 6.203933147044868e-06, "epoch": 1.9229918212502666, "total_flos": 1835279286504407040, "step": 478000 }, { "loss": 3.185, "learning_rate": 6.2031220216489375e-06, "epoch": 1.9233941207944611, "total_flos": 1835669105128611840, "step": 478100 }, { "loss": 3.16, "learning_rate": 6.202310896253007e-06, "epoch": 1.9237964203386557, "total_flos": 1836046107725291520, "step": 478200 }, { "loss": 3.2225, "learning_rate": 6.201499770857076e-06, "epoch": 1.9241987198828503, "total_flos": 1836432314704773120, "step": 478300 }, { "loss": 3.195, "learning_rate": 6.200688645461146e-06, "epoch": 1.9246010194270449, "total_flos": 1836809083606794240, "step": 478400 }, { "loss": 3.21, "learning_rate": 6.199877520065215e-06, "epoch": 1.9250033189712394, "total_flos": 1837183701455708160, "step": 478500 }, { "loss": 3.215, "learning_rate": 6.199066394669284e-06, "epoch": 1.9254056185154342, "total_flos": 1837584216921784320, "step": 478600 }, { "loss": 3.185, "learning_rate": 6.1982552692733535e-06, "epoch": 1.9258079180596288, "total_flos": 1837966238642380800, "step": 478700 }, { "loss": 3.225, "learning_rate": 6.1974441438774235e-06, "epoch": 1.9262102176038236, "total_flos": 1838376935759831040, "step": 478800 }, { "loss": 3.23, "learning_rate": 6.196633018481493e-06, "epoch": 1.9266125171480182, "total_flos": 1838775124901806080, "step": 478900 }, { "loss": 3.1875, "learning_rate": 6.195821893085562e-06, "epoch": 1.9270148166922128, "total_flos": 1839148595522396160, "step": 479000 }, { "loss": 3.235, "learning_rate": 6.195010767689631e-06, "epoch": 1.9274171162364073, "total_flos": 1839545276271575040, "step": 479100 }, { "loss": 3.2275, "learning_rate": 6.194199642293701e-06, "epoch": 1.927819415780602, "total_flos": 1839947735652311040, "step": 479200 }, { "loss": 3.195, "learning_rate": 6.19338851689777e-06, "epoch": 1.9282217153247965, "total_flos": 1840326623739985920, "step": 479300 }, { "loss": 3.25, "learning_rate": 6.1925773915018395e-06, "epoch": 1.928624014868991, "total_flos": 1840715226089717760, "step": 479400 }, { "loss": 3.2125, "learning_rate": 6.19176626610591e-06, "epoch": 1.9290263144131856, "total_flos": 1841106765556408320, "step": 479500 }, { "loss": 3.215, "learning_rate": 6.190955140709979e-06, "epoch": 1.9294286139573804, "total_flos": 1841479901568737280, "step": 479600 }, { "loss": 3.1675, "learning_rate": 6.190144015314048e-06, "epoch": 1.929830913501575, "total_flos": 1841859368581816320, "step": 479700 }, { "loss": 3.175, "learning_rate": 6.189332889918117e-06, "epoch": 1.9302332130457698, "total_flos": 1842239541990113280, "step": 479800 }, { "loss": 3.1925, "learning_rate": 6.188521764522187e-06, "epoch": 1.9306355125899644, "total_flos": 1842622185126051840, "step": 479900 }, { "loss": 3.1625, "learning_rate": 6.187710639126256e-06, "epoch": 1.931037812134159, "total_flos": 1842996532101611520, "step": 480000 }, { "loss": 3.18, "learning_rate": 6.186899513730326e-06, "epoch": 1.9314401116783535, "total_flos": 1843395990630481920, "step": 480100 }, { "loss": 3.175, "learning_rate": 6.186088388334395e-06, "epoch": 1.931842411222548, "total_flos": 1843773184431882240, "step": 480200 }, { "loss": 3.225, "learning_rate": 6.185277262938465e-06, "epoch": 1.9322447107667426, "total_flos": 1844153692448440320, "step": 480300 }, { "loss": 3.1675, "learning_rate": 6.184466137542534e-06, "epoch": 1.9326470103109372, "total_flos": 1844540133122580480, "step": 480400 }, { "loss": 3.1775, "learning_rate": 6.183655012146603e-06, "epoch": 1.9330493098551318, "total_flos": 1844928060944547840, "step": 480500 }, { "loss": 3.1675, "learning_rate": 6.1828438867506724e-06, "epoch": 1.9334516093993266, "total_flos": 1845316169348751360, "step": 480600 }, { "loss": 3.2075, "learning_rate": 6.1820327613547425e-06, "epoch": 1.9338539089435212, "total_flos": 1845703321729351680, "step": 480700 }, { "loss": 3.2, "learning_rate": 6.181221635958812e-06, "epoch": 1.9342562084877157, "total_flos": 1846095227671756800, "step": 480800 }, { "loss": 3.1575, "learning_rate": 6.180410510562881e-06, "epoch": 1.9346585080319105, "total_flos": 1846478226660925440, "step": 480900 }, { "loss": 3.195, "learning_rate": 6.17959938516695e-06, "epoch": 1.935060807576105, "total_flos": 1846842694725918720, "step": 481000 }, { "loss": 3.1125, "learning_rate": 6.17878825977102e-06, "epoch": 1.9354631071202997, "total_flos": 1847214710066135040, "step": 481100 }, { "loss": 3.215, "learning_rate": 6.177977134375089e-06, "epoch": 1.9358654066644942, "total_flos": 1847596030702755840, "step": 481200 }, { "loss": 3.1925, "learning_rate": 6.1771660089791585e-06, "epoch": 1.9362677062086888, "total_flos": 1847986789416837120, "step": 481300 }, { "loss": 3.22, "learning_rate": 6.176354883583228e-06, "epoch": 1.9366700057528834, "total_flos": 1848371541115944960, "step": 481400 }, { "loss": 3.135, "learning_rate": 6.175543758187298e-06, "epoch": 1.937072305297078, "total_flos": 1848753950557224960, "step": 481500 }, { "loss": 3.1625, "learning_rate": 6.174732632791367e-06, "epoch": 1.9374746048412728, "total_flos": 1849150445412925440, "step": 481600 }, { "loss": 3.1725, "learning_rate": 6.173921507395436e-06, "epoch": 1.9378769043854673, "total_flos": 1849548847004590080, "step": 481700 }, { "loss": 3.1825, "learning_rate": 6.173110381999505e-06, "epoch": 1.938279203929662, "total_flos": 1849937529022955520, "step": 481800 }, { "loss": 3.1925, "learning_rate": 6.172299256603575e-06, "epoch": 1.9386815034738567, "total_flos": 1850327528229396480, "step": 481900 }, { "loss": 3.175, "learning_rate": 6.1714881312076446e-06, "epoch": 1.9390838030180513, "total_flos": 1850699102736506880, "step": 482000 }, { "loss": 3.2075, "learning_rate": 6.170677005811714e-06, "epoch": 1.9394861025622459, "total_flos": 1851095470122393600, "step": 482100 }, { "loss": 3.1625, "learning_rate": 6.169865880415783e-06, "epoch": 1.9398884021064404, "total_flos": 1851495072054804480, "step": 482200 }, { "loss": 3.18, "learning_rate": 6.169054755019853e-06, "epoch": 1.940290701650635, "total_flos": 1851886245045780480, "step": 482300 }, { "loss": 3.225, "learning_rate": 6.168243629623922e-06, "epoch": 1.9406930011948296, "total_flos": 1852279914320609280, "step": 482400 }, { "loss": 3.21, "learning_rate": 6.167432504227991e-06, "epoch": 1.9410953007390241, "total_flos": 1852640287417835520, "step": 482500 }, { "loss": 3.21, "learning_rate": 6.166621378832061e-06, "epoch": 1.9414976002832187, "total_flos": 1853009211615068160, "step": 482600 }, { "loss": 3.1675, "learning_rate": 6.165810253436131e-06, "epoch": 1.9418998998274135, "total_flos": 1853385417525411840, "step": 482700 }, { "loss": 3.1925, "learning_rate": 6.1649991280402e-06, "epoch": 1.942302199371608, "total_flos": 1853761267582525440, "step": 482800 }, { "loss": 3.215, "learning_rate": 6.164188002644269e-06, "epoch": 1.9427044989158029, "total_flos": 1854140463722250240, "step": 482900 }, { "loss": 3.195, "learning_rate": 6.163376877248338e-06, "epoch": 1.9431067984599975, "total_flos": 1854528216273223680, "step": 483000 }, { "loss": 3.1575, "learning_rate": 6.162565751852408e-06, "epoch": 1.943509098004192, "total_flos": 1854918465108049920, "step": 483100 }, { "loss": 3.16, "learning_rate": 6.1617546264564775e-06, "epoch": 1.9439113975483866, "total_flos": 1855306339817594880, "step": 483200 }, { "loss": 3.15, "learning_rate": 6.160943501060547e-06, "epoch": 1.9443136970925812, "total_flos": 1855680214092595200, "step": 483300 }, { "loss": 3.1075, "learning_rate": 6.160132375664616e-06, "epoch": 1.9447159966367757, "total_flos": 1856077558747054080, "step": 483400 }, { "loss": 3.2, "learning_rate": 6.159321250268686e-06, "epoch": 1.9451182961809703, "total_flos": 1856464647392747520, "step": 483500 }, { "loss": 3.225, "learning_rate": 6.158510124872755e-06, "epoch": 1.945520595725165, "total_flos": 1856855055564840960, "step": 483600 }, { "loss": 3.175, "learning_rate": 6.157698999476824e-06, "epoch": 1.9459228952693597, "total_flos": 1857235722918666240, "step": 483700 }, { "loss": 3.16, "learning_rate": 6.1568878740808935e-06, "epoch": 1.9463251948135543, "total_flos": 1857609602504908800, "step": 483800 }, { "loss": 3.19, "learning_rate": 6.1560767486849635e-06, "epoch": 1.946727494357749, "total_flos": 1858008896385269760, "step": 483900 }, { "loss": 3.1525, "learning_rate": 6.155265623289033e-06, "epoch": 1.9471297939019436, "total_flos": 1858394545684316160, "step": 484000 }, { "loss": 3.23, "learning_rate": 6.154454497893102e-06, "epoch": 1.9475320934461382, "total_flos": 1858785909880012800, "step": 484100 }, { "loss": 3.145, "learning_rate": 6.153643372497172e-06, "epoch": 1.9479343929903328, "total_flos": 1859163815387873280, "step": 484200 }, { "loss": 3.18, "learning_rate": 6.152832247101241e-06, "epoch": 1.9483366925345273, "total_flos": 1859548668000583680, "step": 484300 }, { "loss": 3.1925, "learning_rate": 6.15202112170531e-06, "epoch": 1.948738992078722, "total_flos": 1859929749631303680, "step": 484400 }, { "loss": 3.205, "learning_rate": 6.1512099963093795e-06, "epoch": 1.9491412916229165, "total_flos": 1860312286542397440, "step": 484500 }, { "loss": 3.2025, "learning_rate": 6.15039887091345e-06, "epoch": 1.949543591167111, "total_flos": 1860713582761082880, "step": 484600 }, { "loss": 3.2075, "learning_rate": 6.149587745517519e-06, "epoch": 1.9499458907113059, "total_flos": 1861079676066201600, "step": 484700 }, { "loss": 3.165, "learning_rate": 6.148776620121588e-06, "epoch": 1.9503481902555004, "total_flos": 1861472081265377280, "step": 484800 }, { "loss": 3.1725, "learning_rate": 6.147965494725657e-06, "epoch": 1.950750489799695, "total_flos": 1861853858668830720, "step": 484900 }, { "loss": 3.215, "learning_rate": 6.147154369329727e-06, "epoch": 1.9511527893438898, "total_flos": 1862234653492469760, "step": 485000 }, { "loss": 3.165, "learning_rate": 6.146343243933796e-06, "epoch": 1.9515550888880844, "total_flos": 1862622198904995840, "step": 485100 }, { "loss": 3.2025, "learning_rate": 6.145532118537866e-06, "epoch": 1.951957388432279, "total_flos": 1863012814215536640, "step": 485200 }, { "loss": 3.205, "learning_rate": 6.144720993141935e-06, "epoch": 1.9523596879764735, "total_flos": 1863397624338309120, "step": 485300 }, { "loss": 3.16, "learning_rate": 6.143909867746005e-06, "epoch": 1.952761987520668, "total_flos": 1863779056511016960, "step": 485400 }, { "loss": 3.205, "learning_rate": 6.143098742350074e-06, "epoch": 1.9531642870648627, "total_flos": 1864168991982551040, "step": 485500 }, { "loss": 3.1725, "learning_rate": 6.142287616954143e-06, "epoch": 1.9535665866090572, "total_flos": 1864540890475438080, "step": 485600 }, { "loss": 3.1125, "learning_rate": 6.1414764915582124e-06, "epoch": 1.953968886153252, "total_flos": 1864911301820497920, "step": 485700 }, { "loss": 3.175, "learning_rate": 6.1406653661622825e-06, "epoch": 1.9543711856974466, "total_flos": 1865291491162521600, "step": 485800 }, { "loss": 3.185, "learning_rate": 6.139854240766352e-06, "epoch": 1.9547734852416412, "total_flos": 1865663548992675840, "step": 485900 }, { "loss": 3.125, "learning_rate": 6.139043115370421e-06, "epoch": 1.955175784785836, "total_flos": 1866037805677117440, "step": 486000 }, { "loss": 3.1075, "learning_rate": 6.13823198997449e-06, "epoch": 1.9555780843300306, "total_flos": 1866410129069383680, "step": 486100 }, { "loss": 3.2075, "learning_rate": 6.13742086457856e-06, "epoch": 1.9559803838742251, "total_flos": 1866794795788615680, "step": 486200 }, { "loss": 3.22, "learning_rate": 6.136609739182629e-06, "epoch": 1.9563826834184197, "total_flos": 1867177492036976640, "step": 486300 }, { "loss": 3.2175, "learning_rate": 6.1357986137866985e-06, "epoch": 1.9567849829626143, "total_flos": 1867554521189867520, "step": 486400 }, { "loss": 3.1275, "learning_rate": 6.134987488390768e-06, "epoch": 1.9571872825068088, "total_flos": 1867940027085373440, "step": 486500 }, { "loss": 3.2075, "learning_rate": 6.134176362994838e-06, "epoch": 1.9575895820510034, "total_flos": 1868335735877222400, "step": 486600 }, { "loss": 3.17, "learning_rate": 6.133365237598907e-06, "epoch": 1.957991881595198, "total_flos": 1868718681753968640, "step": 486700 }, { "loss": 3.125, "learning_rate": 6.132554112202976e-06, "epoch": 1.9583941811393928, "total_flos": 1869110608941342720, "step": 486800 }, { "loss": 3.0975, "learning_rate": 6.131742986807045e-06, "epoch": 1.9587964806835874, "total_flos": 1869485030274293760, "step": 486900 }, { "loss": 3.145, "learning_rate": 6.130931861411115e-06, "epoch": 1.9591987802277822, "total_flos": 1869865617959485440, "step": 487000 }, { "loss": 3.1325, "learning_rate": 6.1301207360151846e-06, "epoch": 1.9596010797719767, "total_flos": 1870261172725309440, "step": 487100 }, { "loss": 3.14, "learning_rate": 6.129309610619254e-06, "epoch": 1.9600033793161713, "total_flos": 1870652005796782080, "step": 487200 }, { "loss": 3.22, "learning_rate": 6.128498485223323e-06, "epoch": 1.9604056788603659, "total_flos": 1871055421201121280, "step": 487300 }, { "loss": 3.215, "learning_rate": 6.127687359827393e-06, "epoch": 1.9608079784045604, "total_flos": 1871440443773583360, "step": 487400 }, { "loss": 3.14, "learning_rate": 6.126876234431462e-06, "epoch": 1.961210277948755, "total_flos": 1871824016376913920, "step": 487500 }, { "loss": 3.1425, "learning_rate": 6.126065109035531e-06, "epoch": 1.9616125774929496, "total_flos": 1872207992634654720, "step": 487600 }, { "loss": 3.215, "learning_rate": 6.125253983639601e-06, "epoch": 1.9620148770371442, "total_flos": 1872582934469345280, "step": 487700 }, { "loss": 3.2325, "learning_rate": 6.124442858243671e-06, "epoch": 1.962417176581339, "total_flos": 1872980549997158400, "step": 487800 }, { "loss": 3.135, "learning_rate": 6.12363173284774e-06, "epoch": 1.9628194761255335, "total_flos": 1873371478670991360, "step": 487900 }, { "loss": 3.19, "learning_rate": 6.122820607451809e-06, "epoch": 1.9632217756697283, "total_flos": 1873750711989411840, "step": 488000 }, { "loss": 3.115, "learning_rate": 6.122009482055878e-06, "epoch": 1.963624075213923, "total_flos": 1874130428630876160, "step": 488100 }, { "loss": 3.16, "learning_rate": 6.121198356659948e-06, "epoch": 1.9640263747581175, "total_flos": 1874506740766064640, "step": 488200 }, { "loss": 3.235, "learning_rate": 6.1203872312640175e-06, "epoch": 1.964428674302312, "total_flos": 1874880811557027840, "step": 488300 }, { "loss": 3.18, "learning_rate": 6.119576105868087e-06, "epoch": 1.9648309738465066, "total_flos": 1875257160870912000, "step": 488400 }, { "loss": 3.1675, "learning_rate": 6.118764980472156e-06, "epoch": 1.9652332733907012, "total_flos": 1875629399283302400, "step": 488500 }, { "loss": 3.165, "learning_rate": 6.117953855076226e-06, "epoch": 1.9656355729348958, "total_flos": 1876016870338437120, "step": 488600 }, { "loss": 3.16, "learning_rate": 6.117142729680295e-06, "epoch": 1.9660378724790903, "total_flos": 1876398838946611200, "step": 488700 }, { "loss": 3.2325, "learning_rate": 6.116331604284364e-06, "epoch": 1.9664401720232851, "total_flos": 1876776786944409600, "step": 488800 }, { "loss": 3.1625, "learning_rate": 6.115520478888435e-06, "epoch": 1.9668424715674797, "total_flos": 1877159031737180160, "step": 488900 }, { "loss": 3.195, "learning_rate": 6.1147093534925035e-06, "epoch": 1.9672447711116743, "total_flos": 1877546168184053760, "step": 489000 }, { "loss": 3.165, "learning_rate": 6.113898228096573e-06, "epoch": 1.967647070655869, "total_flos": 1877926426572226560, "step": 489100 }, { "loss": 3.22, "learning_rate": 6.113087102700642e-06, "epoch": 1.9680493702000637, "total_flos": 1878308724477419520, "step": 489200 }, { "loss": 3.185, "learning_rate": 6.112275977304713e-06, "epoch": 1.9684516697442582, "total_flos": 1878690140716400640, "step": 489300 }, { "loss": 3.17, "learning_rate": 6.111464851908782e-06, "epoch": 1.9688539692884528, "total_flos": 1879061768335933440, "step": 489400 }, { "loss": 3.1525, "learning_rate": 6.11065372651285e-06, "epoch": 1.9692562688326474, "total_flos": 1879434362601553920, "step": 489500 }, { "loss": 3.1925, "learning_rate": 6.1098426011169195e-06, "epoch": 1.969658568376842, "total_flos": 1879816384322150400, "step": 489600 }, { "loss": 3.1675, "learning_rate": 6.1090314757209904e-06, "epoch": 1.9700608679210365, "total_flos": 1880187385215098880, "step": 489700 }, { "loss": 3.1675, "learning_rate": 6.10822035032506e-06, "epoch": 1.9704631674652313, "total_flos": 1880563187471032320, "step": 489800 }, { "loss": 3.2175, "learning_rate": 6.107409224929128e-06, "epoch": 1.9708654670094259, "total_flos": 1880938028392120320, "step": 489900 }, { "loss": 3.215, "learning_rate": 6.106598099533197e-06, "epoch": 1.9712677665536205, "total_flos": 1881317506027683840, "step": 490000 }, { "loss": 3.17, "learning_rate": 6.105786974137268e-06, "epoch": 1.9716700660978153, "total_flos": 1881695878924861440, "step": 490100 }, { "loss": 3.1825, "learning_rate": 6.104975848741337e-06, "epoch": 1.9720723656420098, "total_flos": 1882081815030988800, "step": 490200 }, { "loss": 3.215, "learning_rate": 6.104164723345406e-06, "epoch": 1.9724746651862044, "total_flos": 1882454669547479040, "step": 490300 }, { "loss": 3.2125, "learning_rate": 6.103353597949475e-06, "epoch": 1.972876964730399, "total_flos": 1882865122347786240, "step": 490400 }, { "loss": 3.1925, "learning_rate": 6.102542472553546e-06, "epoch": 1.9732792642745935, "total_flos": 1883234715761541120, "step": 490500 }, { "loss": 3.19, "learning_rate": 6.101731347157615e-06, "epoch": 1.9736815638187881, "total_flos": 1883625060198727680, "step": 490600 }, { "loss": 3.235, "learning_rate": 6.100920221761683e-06, "epoch": 1.9740838633629827, "total_flos": 1883993872859873280, "step": 490700 }, { "loss": 3.1575, "learning_rate": 6.1001090963657524e-06, "epoch": 1.9744861629071773, "total_flos": 1884381168644014080, "step": 490800 }, { "loss": 3.2025, "learning_rate": 6.099297970969823e-06, "epoch": 1.974888462451372, "total_flos": 1884755026985287680, "step": 490900 }, { "loss": 3.1975, "learning_rate": 6.0984868455738925e-06, "epoch": 1.9752907619955666, "total_flos": 1885131238206873600, "step": 491000 }, { "loss": 3.2175, "learning_rate": 6.097675720177961e-06, "epoch": 1.9756930615397614, "total_flos": 1885527807419965440, "step": 491100 }, { "loss": 3.0825, "learning_rate": 6.09686459478203e-06, "epoch": 1.976095361083956, "total_flos": 1885923930488709120, "step": 491200 }, { "loss": 3.175, "learning_rate": 6.096053469386101e-06, "epoch": 1.9764976606281506, "total_flos": 1886305973454274560, "step": 491300 }, { "loss": 3.1675, "learning_rate": 6.09524234399017e-06, "epoch": 1.9768999601723451, "total_flos": 1886715156867686400, "step": 491400 }, { "loss": 3.135, "learning_rate": 6.0944312185942385e-06, "epoch": 1.9773022597165397, "total_flos": 1887106446705991680, "step": 491500 }, { "loss": 3.1575, "learning_rate": 6.093620093198308e-06, "epoch": 1.9777045592607343, "total_flos": 1887492977671249920, "step": 491600 }, { "loss": 3.2325, "learning_rate": 6.092808967802379e-06, "epoch": 1.9781068588049289, "total_flos": 1887880544328744960, "step": 491700 }, { "loss": 3.175, "learning_rate": 6.091997842406448e-06, "epoch": 1.9785091583491234, "total_flos": 1888262762565304320, "step": 491800 }, { "loss": 3.1525, "learning_rate": 6.091186717010517e-06, "epoch": 1.9789114578933182, "total_flos": 1888649729052426240, "step": 491900 }, { "loss": 3.235, "learning_rate": 6.090375591614585e-06, "epoch": 1.9793137574375128, "total_flos": 1889028277220597760, "step": 492000 }, { "loss": 3.2, "learning_rate": 6.089564466218656e-06, "epoch": 1.9797160569817076, "total_flos": 1889420698353500160, "step": 492100 }, { "loss": 3.185, "learning_rate": 6.088753340822725e-06, "epoch": 1.9801183565259022, "total_flos": 1889813220400005120, "step": 492200 }, { "loss": 3.1725, "learning_rate": 6.087942215426795e-06, "epoch": 1.9805206560700968, "total_flos": 1890190520426250240, "step": 492300 }, { "loss": 3.19, "learning_rate": 6.087131090030863e-06, "epoch": 1.9809229556142913, "total_flos": 1890567538956656640, "step": 492400 }, { "loss": 3.15, "learning_rate": 6.086319964634934e-06, "epoch": 1.981325255158486, "total_flos": 1890964007256145920, "step": 492500 }, { "loss": 3.15, "learning_rate": 6.085508839239003e-06, "epoch": 1.9817275547026805, "total_flos": 1891357878358179840, "step": 492600 }, { "loss": 3.1325, "learning_rate": 6.084697713843072e-06, "epoch": 1.982129854246875, "total_flos": 1891740123150950400, "step": 492700 }, { "loss": 3.225, "learning_rate": 6.083886588447141e-06, "epoch": 1.9825321537910696, "total_flos": 1892126659427450880, "step": 492800 }, { "loss": 3.14, "learning_rate": 6.0830754630512115e-06, "epoch": 1.9829344533352644, "total_flos": 1892505664362455040, "step": 492900 }, { "loss": 3.2, "learning_rate": 6.082264337655281e-06, "epoch": 1.983336752879459, "total_flos": 1892875778277949440, "step": 493000 }, { "loss": 3.215, "learning_rate": 6.08145321225935e-06, "epoch": 1.9837390524236536, "total_flos": 1893257900912148480, "step": 493100 }, { "loss": 3.165, "learning_rate": 6.080642086863418e-06, "epoch": 1.9841413519678484, "total_flos": 1893640878656348160, "step": 493200 }, { "loss": 3.1475, "learning_rate": 6.079830961467489e-06, "epoch": 1.984543651512043, "total_flos": 1894020435960545280, "step": 493300 }, { "loss": 3.21, "learning_rate": 6.079019836071558e-06, "epoch": 1.9849459510562375, "total_flos": 1894405437288038400, "step": 493400 }, { "loss": 3.24, "learning_rate": 6.0782087106756275e-06, "epoch": 1.985348250600432, "total_flos": 1894797545057648640, "step": 493500 }, { "loss": 3.1425, "learning_rate": 6.0773975852796975e-06, "epoch": 1.9857505501446266, "total_flos": 1895177638797312000, "step": 493600 }, { "loss": 3.125, "learning_rate": 6.076586459883767e-06, "epoch": 1.9861528496888212, "total_flos": 1895568301909032960, "step": 493700 }, { "loss": 3.1625, "learning_rate": 6.075775334487836e-06, "epoch": 1.9865551492330158, "total_flos": 1895963665470136320, "step": 493800 }, { "loss": 3.1425, "learning_rate": 6.074964209091905e-06, "epoch": 1.9869574487772106, "total_flos": 1896364122512547840, "step": 493900 }, { "loss": 3.1425, "learning_rate": 6.074153083695975e-06, "epoch": 1.9873597483214052, "total_flos": 1896728664934932480, "step": 494000 }, { "loss": 3.1525, "learning_rate": 6.073341958300044e-06, "epoch": 1.9877620478655997, "total_flos": 1897101439782789120, "step": 494100 }, { "loss": 3.17, "learning_rate": 6.0725308329041136e-06, "epoch": 1.9881643474097945, "total_flos": 1897485575377797120, "step": 494200 }, { "loss": 3.155, "learning_rate": 6.071719707508183e-06, "epoch": 1.988566646953989, "total_flos": 1897873837808025600, "step": 494300 }, { "loss": 3.13, "learning_rate": 6.070908582112253e-06, "epoch": 1.9889689464981837, "total_flos": 1898249863136133120, "step": 494400 }, { "loss": 3.155, "learning_rate": 6.070097456716322e-06, "epoch": 1.9893712460423782, "total_flos": 1898631943280394240, "step": 494500 }, { "loss": 3.165, "learning_rate": 6.069286331320391e-06, "epoch": 1.9897735455865728, "total_flos": 1899022595769630720, "step": 494600 }, { "loss": 3.1875, "learning_rate": 6.06847520592446e-06, "epoch": 1.9901758451307674, "total_flos": 1899410194294579200, "step": 494700 }, { "loss": 3.1675, "learning_rate": 6.0676640805285304e-06, "epoch": 1.990578144674962, "total_flos": 1899789719731322880, "step": 494800 }, { "loss": 3.175, "learning_rate": 6.0668529551326e-06, "epoch": 1.9909804442191565, "total_flos": 1900182082440560640, "step": 494900 }, { "loss": 3.1925, "learning_rate": 6.066041829736669e-06, "epoch": 1.9913827437633513, "total_flos": 1900561698168422400, "step": 495000 }, { "loss": 3.2225, "learning_rate": 6.065230704340738e-06, "epoch": 1.991785043307546, "total_flos": 1900951044092067840, "step": 495100 }, { "loss": 3.195, "learning_rate": 6.064419578944808e-06, "epoch": 1.9921873428517407, "total_flos": 1901322560175513600, "step": 495200 }, { "loss": 3.1075, "learning_rate": 6.063608453548877e-06, "epoch": 1.9925896423959353, "total_flos": 1901717775021834240, "step": 495300 }, { "loss": 3.185, "learning_rate": 6.0627973281529465e-06, "epoch": 1.9929919419401299, "total_flos": 1902114211453870080, "step": 495400 }, { "loss": 3.1925, "learning_rate": 6.061986202757016e-06, "epoch": 1.9933942414843244, "total_flos": 1902507774503854080, "step": 495500 }, { "loss": 3.1575, "learning_rate": 6.061175077361086e-06, "epoch": 1.993796541028519, "total_flos": 1902889477549916160, "step": 495600 }, { "loss": 3.2, "learning_rate": 6.060363951965155e-06, "epoch": 1.9941988405727136, "total_flos": 1903264355649699840, "step": 495700 }, { "loss": 3.15, "learning_rate": 6.059552826569224e-06, "epoch": 1.9946011401169081, "total_flos": 1903639132835880960, "step": 495800 }, { "loss": 3.17, "learning_rate": 6.058741701173293e-06, "epoch": 1.9950034396611027, "total_flos": 1904029201088471040, "step": 495900 }, { "loss": 3.1875, "learning_rate": 6.057930575777363e-06, "epoch": 1.9954057392052975, "total_flos": 1904401572281917440, "step": 496000 }, { "loss": 3.185, "learning_rate": 6.0571194503814325e-06, "epoch": 1.995808038749492, "total_flos": 1904771787111014400, "step": 496100 }, { "loss": 3.135, "learning_rate": 6.056308324985502e-06, "epoch": 1.9962103382936867, "total_flos": 1905151562176143360, "step": 496200 }, { "loss": 3.1525, "learning_rate": 6.055497199589571e-06, "epoch": 1.9966126378378815, "total_flos": 1905534847972392960, "step": 496300 }, { "loss": 3.155, "learning_rate": 6.054686074193641e-06, "epoch": 1.997014937382076, "total_flos": 1905910905167953920, "step": 496400 }, { "loss": 3.17, "learning_rate": 6.05387494879771e-06, "epoch": 1.9974172369262706, "total_flos": 1906279239817297920, "step": 496500 }, { "loss": 3.195, "learning_rate": 6.053063823401779e-06, "epoch": 1.9978195364704652, "total_flos": 1906678560253870080, "step": 496600 }, { "loss": 3.175, "learning_rate": 6.0522526980058485e-06, "epoch": 1.9982218360146597, "total_flos": 1907049603636756480, "step": 496700 }, { "loss": 3.1675, "learning_rate": 6.051441572609919e-06, "epoch": 1.9986241355588543, "total_flos": 1907445046866493440, "step": 496800 }, { "loss": 3.165, "learning_rate": 6.050630447213988e-06, "epoch": 1.999026435103049, "total_flos": 1907820541070376960, "step": 496900 }, { "loss": 3.155, "learning_rate": 6.049819321818057e-06, "epoch": 1.9994287346472437, "total_flos": 1908207555358679040, "step": 497000 }, { "loss": 3.1075, "learning_rate": 6.049008196422126e-06, "epoch": 1.9998310341914383, "total_flos": 1908611735581900800, "step": 497100 }, { "loss": 3.145, "learning_rate": 6.048197071026196e-06, "epoch": 2.000233333735633, "total_flos": 1908986000233205760, "step": 497200 }, { "loss": 3.17, "learning_rate": 6.047385945630265e-06, "epoch": 2.0006356332798276, "total_flos": 1909369227605790720, "step": 497300 }, { "loss": 3.1, "learning_rate": 6.046574820234335e-06, "epoch": 2.001037932824022, "total_flos": 1909758355768504320, "step": 497400 }, { "loss": 3.1975, "learning_rate": 6.045763694838404e-06, "epoch": 2.0014402323682168, "total_flos": 1910131576760709120, "step": 497500 }, { "loss": 3.175, "learning_rate": 6.044952569442474e-06, "epoch": 2.0018425319124113, "total_flos": 1910520843015720960, "step": 497600 }, { "loss": 3.1725, "learning_rate": 6.044141444046543e-06, "epoch": 2.002244831456606, "total_flos": 1910904681181163520, "step": 497700 }, { "loss": 3.2, "learning_rate": 6.043330318650612e-06, "epoch": 2.0026471310008005, "total_flos": 1911289193874370560, "step": 497800 }, { "loss": 3.1875, "learning_rate": 6.0425191932546814e-06, "epoch": 2.003049430544995, "total_flos": 1911691987863367680, "step": 497900 }, { "loss": 3.14, "learning_rate": 6.0417080678587515e-06, "epoch": 2.0034517300891896, "total_flos": 1912076367775518720, "step": 498000 }, { "loss": 3.1275, "learning_rate": 6.040896942462821e-06, "epoch": 2.003854029633384, "total_flos": 1912463706049597440, "step": 498100 }, { "loss": 3.18, "learning_rate": 6.04008581706689e-06, "epoch": 2.0042563291775792, "total_flos": 1912844824859013120, "step": 498200 }, { "loss": 3.1475, "learning_rate": 6.039274691670959e-06, "epoch": 2.004658628721774, "total_flos": 1913226049893273600, "step": 498300 }, { "loss": 3.1575, "learning_rate": 6.038463566275029e-06, "epoch": 2.0050609282659684, "total_flos": 1913627006192455680, "step": 498400 }, { "loss": 3.3, "learning_rate": 6.037652440879098e-06, "epoch": 2.005463227810163, "total_flos": 1913994098011115520, "step": 498500 }, { "loss": 3.2075, "learning_rate": 6.0368413154831675e-06, "epoch": 2.0058655273543575, "total_flos": 1914373756228915200, "step": 498600 }, { "loss": 3.115, "learning_rate": 6.0360301900872375e-06, "epoch": 2.006267826898552, "total_flos": 1914764366228213760, "step": 498700 }, { "loss": 3.1925, "learning_rate": 6.035219064691307e-06, "epoch": 2.0066701264427467, "total_flos": 1915162751886151680, "step": 498800 }, { "loss": 3.19, "learning_rate": 6.034407939295376e-06, "epoch": 2.0070724259869412, "total_flos": 1915540471500533760, "step": 498900 }, { "loss": 3.1075, "learning_rate": 6.033596813899445e-06, "epoch": 2.007474725531136, "total_flos": 1915946914312949760, "step": 499000 }, { "loss": 3.23, "learning_rate": 6.032785688503515e-06, "epoch": 2.0078770250753304, "total_flos": 1916336339905228800, "step": 499100 }, { "loss": 3.175, "learning_rate": 6.031974563107584e-06, "epoch": 2.0082793246195254, "total_flos": 1916709279401594880, "step": 499200 }, { "loss": 3.1425, "learning_rate": 6.0311634377116536e-06, "epoch": 2.00868162416372, "total_flos": 1917096585808220160, "step": 499300 }, { "loss": 3.14, "learning_rate": 6.030352312315723e-06, "epoch": 2.0090839237079146, "total_flos": 1917462397617500160, "step": 499400 }, { "loss": 3.175, "learning_rate": 6.029541186919793e-06, "epoch": 2.009486223252109, "total_flos": 1917850171413442560, "step": 499500 }, { "loss": 3.16, "learning_rate": 6.028730061523862e-06, "epoch": 2.0098885227963037, "total_flos": 1918235810090004480, "step": 499600 }, { "loss": 3.14, "learning_rate": 6.027918936127931e-06, "epoch": 2.0102908223404983, "total_flos": 1918633420306575360, "step": 499700 }, { "loss": 3.125, "learning_rate": 6.027107810732e-06, "epoch": 2.010693121884693, "total_flos": 1919029219389542400, "step": 499800 }, { "loss": 3.115, "learning_rate": 6.0262966853360704e-06, "epoch": 2.0110954214288874, "total_flos": 1919407725067776000, "step": 499900 }, { "loss": 3.1275, "learning_rate": 6.02548555994014e-06, "epoch": 2.011497720973082, "total_flos": 1919790389448683520, "step": 500000 }, { "loss": 3.085, "learning_rate": 6.024674434544209e-06, "epoch": 2.0119000205172766, "total_flos": 1920172777644994560, "step": 500100 }, { "loss": 3.1125, "learning_rate": 6.023863309148278e-06, "epoch": 2.0123023200614716, "total_flos": 1920535625781104640, "step": 500200 }, { "loss": 3.17, "learning_rate": 6.023052183752348e-06, "epoch": 2.012704619605666, "total_flos": 1920923856343879680, "step": 500300 }, { "loss": 3.1775, "learning_rate": 6.022241058356417e-06, "epoch": 2.0131069191498607, "total_flos": 1921303254310809600, "step": 500400 }, { "loss": 3.15, "learning_rate": 6.0214299329604865e-06, "epoch": 2.0135092186940553, "total_flos": 1921693699661598720, "step": 500500 }, { "loss": 3.165, "learning_rate": 6.020618807564556e-06, "epoch": 2.01391151823825, "total_flos": 1922071658281881600, "step": 500600 }, { "loss": 3.16, "learning_rate": 6.019807682168626e-06, "epoch": 2.0143138177824444, "total_flos": 1922467515788513280, "step": 500700 }, { "loss": 3.1825, "learning_rate": 6.018996556772695e-06, "epoch": 2.014716117326639, "total_flos": 1922860961991168000, "step": 500800 }, { "loss": 3.155, "learning_rate": 6.018185431376764e-06, "epoch": 2.0151184168708336, "total_flos": 1923242840308224000, "step": 500900 }, { "loss": 3.1525, "learning_rate": 6.017374305980833e-06, "epoch": 2.015520716415028, "total_flos": 1923642463485603840, "step": 501000 }, { "loss": 3.185, "learning_rate": 6.016563180584903e-06, "epoch": 2.0159230159592227, "total_flos": 1924030327572664320, "step": 501100 }, { "loss": 3.1825, "learning_rate": 6.0157520551889725e-06, "epoch": 2.0163253155034173, "total_flos": 1924405593393131520, "step": 501200 }, { "loss": 3.14, "learning_rate": 6.014940929793042e-06, "epoch": 2.0167276150476123, "total_flos": 1924790525674475520, "step": 501300 }, { "loss": 3.155, "learning_rate": 6.014129804397111e-06, "epoch": 2.017129914591807, "total_flos": 1925168080640348160, "step": 501400 }, { "loss": 3.1075, "learning_rate": 6.013318679001181e-06, "epoch": 2.0175322141360015, "total_flos": 1925544499000381440, "step": 501500 }, { "loss": 3.195, "learning_rate": 6.01250755360525e-06, "epoch": 2.017934513680196, "total_flos": 1925912908007116800, "step": 501600 }, { "loss": 3.18, "learning_rate": 6.011696428209319e-06, "epoch": 2.0183368132243906, "total_flos": 1926295126243676160, "step": 501700 }, { "loss": 3.1525, "learning_rate": 6.0108853028133885e-06, "epoch": 2.018739112768585, "total_flos": 1926680361265827840, "step": 501800 }, { "loss": 3.14, "learning_rate": 6.010074177417459e-06, "epoch": 2.0191414123127798, "total_flos": 1927052785571696640, "step": 501900 }, { "loss": 3.185, "learning_rate": 6.009263052021528e-06, "epoch": 2.0195437118569743, "total_flos": 1927424221986508800, "step": 502000 }, { "loss": 3.185, "learning_rate": 6.008451926625597e-06, "epoch": 2.019946011401169, "total_flos": 1927803014471823360, "step": 502100 }, { "loss": 3.1575, "learning_rate": 6.007640801229666e-06, "epoch": 2.0203483109453635, "total_flos": 1928202552669327360, "step": 502200 }, { "loss": 3.1425, "learning_rate": 6.006829675833736e-06, "epoch": 2.0207506104895585, "total_flos": 1928578848870789120, "step": 502300 }, { "loss": 3.1025, "learning_rate": 6.006018550437805e-06, "epoch": 2.021152910033753, "total_flos": 1928955511547965440, "step": 502400 }, { "loss": 3.1425, "learning_rate": 6.005207425041875e-06, "epoch": 2.0215552095779477, "total_flos": 1929344140453908480, "step": 502500 }, { "loss": 3.1575, "learning_rate": 6.004396299645944e-06, "epoch": 2.0219575091221422, "total_flos": 1929748958026199040, "step": 502600 }, { "loss": 3.1775, "learning_rate": 6.003585174250014e-06, "epoch": 2.022359808666337, "total_flos": 1930129779406049280, "step": 502700 }, { "loss": 3.1875, "learning_rate": 6.002774048854083e-06, "epoch": 2.0227621082105314, "total_flos": 1930508948989562880, "step": 502800 }, { "loss": 3.1475, "learning_rate": 6.001962923458152e-06, "epoch": 2.023164407754726, "total_flos": 1930883789910650880, "step": 502900 }, { "loss": 3.1775, "learning_rate": 6.0011517980622214e-06, "epoch": 2.0235667072989205, "total_flos": 1931263400327270400, "step": 503000 }, { "loss": 3.1325, "learning_rate": 6.0003406726662915e-06, "epoch": 2.023969006843115, "total_flos": 1931659677422039040, "step": 503100 }, { "loss": 3.21, "learning_rate": 5.999529547270361e-06, "epoch": 2.0243713063873097, "total_flos": 1932040828098908160, "step": 503200 }, { "loss": 3.1225, "learning_rate": 5.99871842187443e-06, "epoch": 2.0247736059315047, "total_flos": 1932436584691937280, "step": 503300 }, { "loss": 3.13, "learning_rate": 5.9979072964785e-06, "epoch": 2.0251759054756993, "total_flos": 1932836324716646400, "step": 503400 }, { "loss": 3.1725, "learning_rate": 5.997096171082569e-06, "epoch": 2.025578205019894, "total_flos": 1933224608391843840, "step": 503500 }, { "loss": 3.17, "learning_rate": 5.996285045686638e-06, "epoch": 2.0259805045640884, "total_flos": 1933607857009397760, "step": 503600 }, { "loss": 3.205, "learning_rate": 5.9954739202907075e-06, "epoch": 2.026382804108283, "total_flos": 1933981486967255040, "step": 503700 }, { "loss": 3.1425, "learning_rate": 5.9946627948947775e-06, "epoch": 2.0267851036524775, "total_flos": 1934351951424737280, "step": 503800 }, { "loss": 3.1625, "learning_rate": 5.993851669498847e-06, "epoch": 2.027187403196672, "total_flos": 1934728114845143040, "step": 503900 }, { "loss": 3.1175, "learning_rate": 5.993040544102916e-06, "epoch": 2.0275897027408667, "total_flos": 1935111825540771840, "step": 504000 }, { "loss": 3.16, "learning_rate": 5.992229418706985e-06, "epoch": 2.0279920022850613, "total_flos": 1935502764837089280, "step": 504100 }, { "loss": 3.16, "learning_rate": 5.991418293311055e-06, "epoch": 2.028394301829256, "total_flos": 1935873441744261120, "step": 504200 }, { "loss": 3.1375, "learning_rate": 5.990607167915124e-06, "epoch": 2.028796601373451, "total_flos": 1936282927898480640, "step": 504300 }, { "loss": 3.165, "learning_rate": 5.9897960425191936e-06, "epoch": 2.0291989009176454, "total_flos": 1936696848939970560, "step": 504400 }, { "loss": 3.19, "learning_rate": 5.988984917123263e-06, "epoch": 2.02960120046184, "total_flos": 1937083427706408960, "step": 504500 }, { "loss": 3.16, "learning_rate": 5.988173791727333e-06, "epoch": 2.0300035000060346, "total_flos": 1937482875612794880, "step": 504600 }, { "loss": 3.1975, "learning_rate": 5.987362666331402e-06, "epoch": 2.030405799550229, "total_flos": 1937863298649477120, "step": 504700 }, { "loss": 3.1525, "learning_rate": 5.986551540935471e-06, "epoch": 2.0308080990944237, "total_flos": 1938244645842309120, "step": 504800 }, { "loss": 3.1075, "learning_rate": 5.98574041553954e-06, "epoch": 2.0312103986386183, "total_flos": 1938617505670041600, "step": 504900 }, { "loss": 3.12, "learning_rate": 5.9849292901436104e-06, "epoch": 2.031612698182813, "total_flos": 1939005210419834880, "step": 505000 }, { "loss": 3.15, "learning_rate": 5.98411816474768e-06, "epoch": 2.0320149977270074, "total_flos": 1939404435254046720, "step": 505100 }, { "loss": 3.175, "learning_rate": 5.983307039351749e-06, "epoch": 2.032417297271202, "total_flos": 1939789659653713920, "step": 505200 }, { "loss": 3.1275, "learning_rate": 5.982495913955818e-06, "epoch": 2.0328195968153966, "total_flos": 1940159784191692800, "step": 505300 }, { "loss": 3.1125, "learning_rate": 5.981684788559888e-06, "epoch": 2.0332218963595916, "total_flos": 1940539017510113280, "step": 505400 }, { "loss": 3.1725, "learning_rate": 5.980873663163957e-06, "epoch": 2.033624195903786, "total_flos": 1940906619208028160, "step": 505500 }, { "loss": 3.1425, "learning_rate": 5.9800625377680265e-06, "epoch": 2.0340264954479808, "total_flos": 1941294265534156800, "step": 505600 }, { "loss": 3.125, "learning_rate": 5.979251412372096e-06, "epoch": 2.0344287949921753, "total_flos": 1941673504163819520, "step": 505700 }, { "loss": 3.15, "learning_rate": 5.978440286976166e-06, "epoch": 2.03483109453637, "total_flos": 1942053656327147520, "step": 505800 }, { "loss": 3.145, "learning_rate": 5.977629161580235e-06, "epoch": 2.0352333940805645, "total_flos": 1942450076825456640, "step": 505900 }, { "loss": 3.1325, "learning_rate": 5.976818036184304e-06, "epoch": 2.035635693624759, "total_flos": 1942841860609290240, "step": 506000 }, { "loss": 3.165, "learning_rate": 5.976006910788373e-06, "epoch": 2.0360379931689536, "total_flos": 1943241712170086400, "step": 506100 }, { "loss": 3.1625, "learning_rate": 5.975195785392443e-06, "epoch": 2.036440292713148, "total_flos": 1943626230174535680, "step": 506200 }, { "loss": 3.18, "learning_rate": 5.9743846599965125e-06, "epoch": 2.0368425922573428, "total_flos": 1944002813183078400, "step": 506300 }, { "loss": 3.1925, "learning_rate": 5.973573534600582e-06, "epoch": 2.037244891801538, "total_flos": 1944404683015925760, "step": 506400 }, { "loss": 3.1275, "learning_rate": 5.972762409204651e-06, "epoch": 2.0376471913457324, "total_flos": 1944774706640302080, "step": 506500 }, { "loss": 3.135, "learning_rate": 5.971951283808721e-06, "epoch": 2.038049490889927, "total_flos": 1945165141368606720, "step": 506600 }, { "loss": 3.175, "learning_rate": 5.97114015841279e-06, "epoch": 2.0384517904341215, "total_flos": 1945557599680204800, "step": 506700 }, { "loss": 3.075, "learning_rate": 5.970329033016859e-06, "epoch": 2.038854089978316, "total_flos": 1945925180133150720, "step": 506800 }, { "loss": 3.2, "learning_rate": 5.9695179076209285e-06, "epoch": 2.0392563895225106, "total_flos": 1946307361191014400, "step": 506900 }, { "loss": 3.1225, "learning_rate": 5.968706782224999e-06, "epoch": 2.039658689066705, "total_flos": 1946703675464478720, "step": 507000 }, { "loss": 3.125, "learning_rate": 5.967895656829068e-06, "epoch": 2.0400609886109, "total_flos": 1947084895187496960, "step": 507100 }, { "loss": 3.1975, "learning_rate": 5.967084531433137e-06, "epoch": 2.0404632881550944, "total_flos": 1947470778181201920, "step": 507200 }, { "loss": 3.1425, "learning_rate": 5.966273406037206e-06, "epoch": 2.040865587699289, "total_flos": 1947866008961249280, "step": 507300 }, { "loss": 3.18, "learning_rate": 5.965462280641276e-06, "epoch": 2.041267887243484, "total_flos": 1948254526331105280, "step": 507400 }, { "loss": 3.1675, "learning_rate": 5.964651155245345e-06, "epoch": 2.0416701867876785, "total_flos": 1948633711848345600, "step": 507500 }, { "loss": 3.135, "learning_rate": 5.963840029849415e-06, "epoch": 2.042072486331873, "total_flos": 1949027221785907200, "step": 507600 }, { "loss": 3.1575, "learning_rate": 5.963028904453484e-06, "epoch": 2.0424747858760677, "total_flos": 1949400214394695680, "step": 507700 }, { "loss": 3.1225, "learning_rate": 5.962217779057554e-06, "epoch": 2.0428770854202623, "total_flos": 1949794914050519040, "step": 507800 }, { "loss": 3.0975, "learning_rate": 5.961406653661623e-06, "epoch": 2.043279384964457, "total_flos": 1950177254445649920, "step": 507900 }, { "loss": 3.1325, "learning_rate": 5.960595528265692e-06, "epoch": 2.0436816845086514, "total_flos": 1950548239404871680, "step": 508000 }, { "loss": 3.1925, "learning_rate": 5.959784402869762e-06, "epoch": 2.044083984052846, "total_flos": 1950932550270873600, "step": 508100 }, { "loss": 3.1125, "learning_rate": 5.9589732774738315e-06, "epoch": 2.0444862835970405, "total_flos": 1951320568383959040, "step": 508200 }, { "loss": 3.1375, "learning_rate": 5.958162152077901e-06, "epoch": 2.044888583141235, "total_flos": 1951713563131023360, "step": 508300 }, { "loss": 3.16, "learning_rate": 5.95735102668197e-06, "epoch": 2.04529088268543, "total_flos": 1952101830872494080, "step": 508400 }, { "loss": 3.1425, "learning_rate": 5.95653990128604e-06, "epoch": 2.0456931822296247, "total_flos": 1952491665430425600, "step": 508500 }, { "loss": 3.12, "learning_rate": 5.955728775890109e-06, "epoch": 2.0460954817738193, "total_flos": 1952884261834321920, "step": 508600 }, { "loss": 3.1275, "learning_rate": 5.954917650494178e-06, "epoch": 2.046497781318014, "total_flos": 1953256787053793280, "step": 508700 }, { "loss": 3.11, "learning_rate": 5.9541065250982475e-06, "epoch": 2.0469000808622084, "total_flos": 1953635377711902720, "step": 508800 }, { "loss": 3.1725, "learning_rate": 5.9532953997023175e-06, "epoch": 2.047302380406403, "total_flos": 1954016528388771840, "step": 508900 }, { "loss": 3.145, "learning_rate": 5.952484274306387e-06, "epoch": 2.0477046799505976, "total_flos": 1954387295587061760, "step": 509000 }, { "loss": 3.145, "learning_rate": 5.951673148910456e-06, "epoch": 2.048106979494792, "total_flos": 1954786546977484800, "step": 509100 }, { "loss": 3.17, "learning_rate": 5.950862023514525e-06, "epoch": 2.0485092790389867, "total_flos": 1955184417444925440, "step": 509200 }, { "loss": 3.1275, "learning_rate": 5.950050898118595e-06, "epoch": 2.0489115785831813, "total_flos": 1955573742123601920, "step": 509300 }, { "loss": 3.1775, "learning_rate": 5.949239772722664e-06, "epoch": 2.049313878127376, "total_flos": 1955951621075251200, "step": 509400 }, { "loss": 3.1375, "learning_rate": 5.9484286473267336e-06, "epoch": 2.049716177671571, "total_flos": 1956341216627281920, "step": 509500 }, { "loss": 3.145, "learning_rate": 5.947617521930803e-06, "epoch": 2.0501184772157655, "total_flos": 1956719207115018240, "step": 509600 }, { "loss": 3.055, "learning_rate": 5.946806396534874e-06, "epoch": 2.05052077675996, "total_flos": 1957100272812011520, "step": 509700 }, { "loss": 3.145, "learning_rate": 5.945995271138942e-06, "epoch": 2.0509230763041546, "total_flos": 1957469430703902720, "step": 509800 }, { "loss": 3.1475, "learning_rate": 5.945184145743011e-06, "epoch": 2.051325375848349, "total_flos": 1957864666795192320, "step": 509900 }, { "loss": 3.1575, "learning_rate": 5.94437302034708e-06, "epoch": 2.0517276753925437, "total_flos": 1958246592913428480, "step": 510000 }, { "loss": 3.12, "learning_rate": 5.943561894951151e-06, "epoch": 2.0521299749367383, "total_flos": 1958638743172976640, "step": 510100 }, { "loss": 3.1725, "learning_rate": 5.94275076955522e-06, "epoch": 2.052532274480933, "total_flos": 1959031477669171200, "step": 510200 }, { "loss": 3.14, "learning_rate": 5.941939644159289e-06, "epoch": 2.0529345740251275, "total_flos": 1959405532526407680, "step": 510300 }, { "loss": 3.14, "learning_rate": 5.941128518763358e-06, "epoch": 2.053336873569322, "total_flos": 1959793242587443200, "step": 510400 }, { "loss": 3.1075, "learning_rate": 5.940317393367429e-06, "epoch": 2.053739173113517, "total_flos": 1960161571925544960, "step": 510500 }, { "loss": 3.1725, "learning_rate": 5.939506267971497e-06, "epoch": 2.0541414726577116, "total_flos": 1960538733859491840, "step": 510600 }, { "loss": 3.1825, "learning_rate": 5.9386951425755665e-06, "epoch": 2.054543772201906, "total_flos": 1960915497450270720, "step": 510700 }, { "loss": 3.17, "learning_rate": 5.937884017179636e-06, "epoch": 2.0549460717461008, "total_flos": 1961299160344719360, "step": 510800 }, { "loss": 3.1575, "learning_rate": 5.9370728917837065e-06, "epoch": 2.0553483712902954, "total_flos": 1961682796682956800, "step": 510900 }, { "loss": 3.15, "learning_rate": 5.936261766387775e-06, "epoch": 2.05575067083449, "total_flos": 1962065025542000640, "step": 511000 }, { "loss": 3.15, "learning_rate": 5.935450640991844e-06, "epoch": 2.0561529703786845, "total_flos": 1962452677179371520, "step": 511100 }, { "loss": 3.1575, "learning_rate": 5.934639515595913e-06, "epoch": 2.056555269922879, "total_flos": 1962829244254187520, "step": 511200 }, { "loss": 3.1775, "learning_rate": 5.933828390199984e-06, "epoch": 2.0569575694670736, "total_flos": 1963217825358950400, "step": 511300 }, { "loss": 3.1525, "learning_rate": 5.9330172648040525e-06, "epoch": 2.057359869011268, "total_flos": 1963605901895700480, "step": 511400 }, { "loss": 3.0975, "learning_rate": 5.932206139408122e-06, "epoch": 2.0577621685554632, "total_flos": 1964000251009536000, "step": 511500 }, { "loss": 3.13, "learning_rate": 5.931395014012191e-06, "epoch": 2.058164468099658, "total_flos": 1964390696360325120, "step": 511600 }, { "loss": 3.15, "learning_rate": 5.930583888616262e-06, "epoch": 2.0585667676438524, "total_flos": 1964770567027814400, "step": 511700 }, { "loss": 3.1025, "learning_rate": 5.92977276322033e-06, "epoch": 2.058969067188047, "total_flos": 1965151505254993920, "step": 511800 }, { "loss": 3.1425, "learning_rate": 5.928961637824399e-06, "epoch": 2.0593713667322415, "total_flos": 1965559047494553600, "step": 511900 }, { "loss": 3.18, "learning_rate": 5.9281505124284685e-06, "epoch": 2.059773666276436, "total_flos": 1965942227065958400, "step": 512000 }, { "loss": 3.115, "learning_rate": 5.9273393870325394e-06, "epoch": 2.0601759658206307, "total_flos": 1966329384757800960, "step": 512100 }, { "loss": 3.165, "learning_rate": 5.926528261636609e-06, "epoch": 2.0605782653648252, "total_flos": 1966698808211804160, "step": 512200 }, { "loss": 3.1075, "learning_rate": 5.925717136240677e-06, "epoch": 2.06098056490902, "total_flos": 1967074451130470400, "step": 512300 }, { "loss": 3.185, "learning_rate": 5.924906010844746e-06, "epoch": 2.0613828644532144, "total_flos": 1967467865465671680, "step": 512400 }, { "loss": 3.2, "learning_rate": 5.924094885448817e-06, "epoch": 2.0617851639974094, "total_flos": 1967839068185825280, "step": 512500 }, { "loss": 3.1825, "learning_rate": 5.923283760052886e-06, "epoch": 2.062187463541604, "total_flos": 1968225397323878400, "step": 512600 }, { "loss": 3.1775, "learning_rate": 5.922472634656955e-06, "epoch": 2.0625897630857986, "total_flos": 1968597965033287680, "step": 512700 }, { "loss": 3.0625, "learning_rate": 5.9216615092610255e-06, "epoch": 2.062992062629993, "total_flos": 1968974643644190720, "step": 512800 }, { "loss": 3.1775, "learning_rate": 5.920850383865095e-06, "epoch": 2.0633943621741877, "total_flos": 1969356373246464000, "step": 512900 }, { "loss": 3.1375, "learning_rate": 5.920039258469164e-06, "epoch": 2.0637966617183823, "total_flos": 1969721839824998400, "step": 513000 }, { "loss": 3.1375, "learning_rate": 5.919228133073232e-06, "epoch": 2.064198961262577, "total_flos": 1970114436228894720, "step": 513100 }, { "loss": 3.115, "learning_rate": 5.918417007677303e-06, "epoch": 2.0646012608067714, "total_flos": 1970484895375134720, "step": 513200 }, { "loss": 3.115, "learning_rate": 5.917605882281372e-06, "epoch": 2.065003560350966, "total_flos": 1970861446516224000, "step": 513300 }, { "loss": 3.155, "learning_rate": 5.9167947568854415e-06, "epoch": 2.0654058598951606, "total_flos": 1971255179525959680, "step": 513400 }, { "loss": 3.175, "learning_rate": 5.91598363148951e-06, "epoch": 2.065808159439355, "total_flos": 1971651557534330880, "step": 513500 }, { "loss": 3.155, "learning_rate": 5.915172506093581e-06, "epoch": 2.06621045898355, "total_flos": 1972024390805852160, "step": 513600 }, { "loss": 3.12, "learning_rate": 5.91436138069765e-06, "epoch": 2.0666127585277447, "total_flos": 1972400681696071680, "step": 513700 }, { "loss": 3.1425, "learning_rate": 5.913550255301719e-06, "epoch": 2.0670150580719393, "total_flos": 1972779107705671680, "step": 513800 }, { "loss": 3.1625, "learning_rate": 5.9127391299057875e-06, "epoch": 2.067417357616134, "total_flos": 1973143751041658880, "step": 513900 }, { "loss": 3.1375, "learning_rate": 5.911928004509858e-06, "epoch": 2.0678196571603285, "total_flos": 1973514082718085120, "step": 514000 }, { "loss": 3.11, "learning_rate": 5.911116879113928e-06, "epoch": 2.068221956704523, "total_flos": 1973895748585451520, "step": 514100 }, { "loss": 3.14, "learning_rate": 5.910305753717997e-06, "epoch": 2.0686242562487176, "total_flos": 1974262824470384640, "step": 514200 }, { "loss": 3.1725, "learning_rate": 5.909494628322065e-06, "epoch": 2.069026555792912, "total_flos": 1974648739331543040, "step": 514300 }, { "loss": 3.1025, "learning_rate": 5.908683502926136e-06, "epoch": 2.0694288553371067, "total_flos": 1975022751698841600, "step": 514400 }, { "loss": 3.12, "learning_rate": 5.907872377530205e-06, "epoch": 2.0698311548813013, "total_flos": 1975393577320796160, "step": 514500 }, { "loss": 3.1, "learning_rate": 5.907061252134274e-06, "epoch": 2.0702334544254963, "total_flos": 1975781951287111680, "step": 514600 }, { "loss": 3.085, "learning_rate": 5.906250126738344e-06, "epoch": 2.070635753969691, "total_flos": 1976153228364656640, "step": 514700 }, { "loss": 3.1725, "learning_rate": 5.905439001342414e-06, "epoch": 2.0710380535138855, "total_flos": 1976545065260912640, "step": 514800 }, { "loss": 3.1575, "learning_rate": 5.904627875946483e-06, "epoch": 2.07144035305808, "total_flos": 1976929705423933440, "step": 514900 }, { "loss": 3.135, "learning_rate": 5.903816750550552e-06, "epoch": 2.0718426526022746, "total_flos": 1977293222776565760, "step": 515000 }, { "loss": 3.155, "learning_rate": 5.903005625154621e-06, "epoch": 2.072244952146469, "total_flos": 1977668435484610560, "step": 515100 }, { "loss": 3.1375, "learning_rate": 5.902194499758691e-06, "epoch": 2.0726472516906638, "total_flos": 1978059018927697920, "step": 515200 }, { "loss": 3.1425, "learning_rate": 5.9013833743627605e-06, "epoch": 2.0730495512348583, "total_flos": 1978440467034132480, "step": 515300 }, { "loss": 3.1325, "learning_rate": 5.90057224896683e-06, "epoch": 2.073451850779053, "total_flos": 1978820210231808000, "step": 515400 }, { "loss": 3.105, "learning_rate": 5.899761123570899e-06, "epoch": 2.0738541503232475, "total_flos": 1979199082385756160, "step": 515500 }, { "loss": 3.1225, "learning_rate": 5.898949998174969e-06, "epoch": 2.0742564498674425, "total_flos": 1979576350544547840, "step": 515600 }, { "loss": 3.1375, "learning_rate": 5.898138872779038e-06, "epoch": 2.074658749411637, "total_flos": 1979951664166195200, "step": 515700 }, { "loss": 3.175, "learning_rate": 5.897327747383107e-06, "epoch": 2.0750610489558317, "total_flos": 1980334381659525120, "step": 515800 }, { "loss": 3.145, "learning_rate": 5.8965166219871765e-06, "epoch": 2.0754633485000262, "total_flos": 1980725905192488960, "step": 515900 }, { "loss": 3.1425, "learning_rate": 5.8957054965912465e-06, "epoch": 2.075865648044221, "total_flos": 1981105717436313600, "step": 516000 }, { "loss": 3.1175, "learning_rate": 5.894894371195316e-06, "epoch": 2.0762679475884154, "total_flos": 1981485200383119360, "step": 516100 }, { "loss": 3.1375, "learning_rate": 5.894083245799385e-06, "epoch": 2.07667024713261, "total_flos": 1981866021762969600, "step": 516200 }, { "loss": 3.175, "learning_rate": 5.893272120403454e-06, "epoch": 2.0770725466768045, "total_flos": 1982250810640773120, "step": 516300 }, { "loss": 3.13, "learning_rate": 5.892460995007524e-06, "epoch": 2.077474846220999, "total_flos": 1982627457384222720, "step": 516400 }, { "loss": 3.1325, "learning_rate": 5.891649869611593e-06, "epoch": 2.0778771457651937, "total_flos": 1982993210769838080, "step": 516500 }, { "loss": 3.1675, "learning_rate": 5.8908387442156626e-06, "epoch": 2.0782794453093887, "total_flos": 1983381212949196800, "step": 516600 }, { "loss": 3.1275, "learning_rate": 5.890027618819732e-06, "epoch": 2.0786817448535833, "total_flos": 1983750285861212160, "step": 516700 }, { "loss": 3.1025, "learning_rate": 5.889216493423802e-06, "epoch": 2.079084044397778, "total_flos": 1984130496448204800, "step": 516800 }, { "loss": 3.0525, "learning_rate": 5.888405368027871e-06, "epoch": 2.0794863439419724, "total_flos": 1984516820275015680, "step": 516900 }, { "loss": 3.1675, "learning_rate": 5.88759424263194e-06, "epoch": 2.079888643486167, "total_flos": 1984887964571504640, "step": 517000 }, { "loss": 3.08, "learning_rate": 5.886783117236009e-06, "epoch": 2.0802909430303616, "total_flos": 1985290668269383680, "step": 517100 }, { "loss": 3.0775, "learning_rate": 5.8859719918400794e-06, "epoch": 2.080693242574556, "total_flos": 1985665392343142400, "step": 517200 }, { "loss": 3.1625, "learning_rate": 5.885160866444149e-06, "epoch": 2.0810955421187507, "total_flos": 1986043133202493440, "step": 517300 }, { "loss": 3.1425, "learning_rate": 5.884349741048218e-06, "epoch": 2.0814978416629453, "total_flos": 1986443021941985280, "step": 517400 }, { "loss": 3.11, "learning_rate": 5.883538615652288e-06, "epoch": 2.08190014120714, "total_flos": 1986826456453017600, "step": 517500 }, { "loss": 3.1525, "learning_rate": 5.882727490256357e-06, "epoch": 2.0823024407513344, "total_flos": 1987217517907906560, "step": 517600 }, { "loss": 3.1025, "learning_rate": 5.881916364860426e-06, "epoch": 2.0827047402955294, "total_flos": 1987590414914334720, "step": 517700 }, { "loss": 3.14, "learning_rate": 5.8811052394644955e-06, "epoch": 2.083107039839724, "total_flos": 1987979426229719040, "step": 517800 }, { "loss": 3.1525, "learning_rate": 5.8802941140685655e-06, "epoch": 2.0835093393839186, "total_flos": 1988357167089070080, "step": 517900 }, { "loss": 3.11, "learning_rate": 5.879482988672635e-06, "epoch": 2.083911638928113, "total_flos": 1988744383204577280, "step": 518000 }, { "loss": 3.195, "learning_rate": 5.878671863276704e-06, "epoch": 2.0843139384723077, "total_flos": 1989133824730583040, "step": 518100 }, { "loss": 3.145, "learning_rate": 5.877860737880773e-06, "epoch": 2.0847162380165023, "total_flos": 1989524025764229120, "step": 518200 }, { "loss": 3.165, "learning_rate": 5.877049612484843e-06, "epoch": 2.085118537560697, "total_flos": 1989914996928000000, "step": 518300 }, { "loss": 3.0925, "learning_rate": 5.876238487088912e-06, "epoch": 2.0855208371048914, "total_flos": 1990298755424808960, "step": 518400 }, { "loss": 3.1275, "learning_rate": 5.8754273616929815e-06, "epoch": 2.085923136649086, "total_flos": 1990678291484037120, "step": 518500 }, { "loss": 3.1525, "learning_rate": 5.874616236297051e-06, "epoch": 2.0863254361932806, "total_flos": 1991066261795942400, "step": 518600 }, { "loss": 3.125, "learning_rate": 5.873805110901121e-06, "epoch": 2.0867277357374756, "total_flos": 1991464137574625280, "step": 518700 }, { "loss": 3.0775, "learning_rate": 5.87299398550519e-06, "epoch": 2.08713003528167, "total_flos": 1991849298239385600, "step": 518800 }, { "loss": 3.125, "learning_rate": 5.872182860109259e-06, "epoch": 2.0875323348258648, "total_flos": 1992228218194513920, "step": 518900 }, { "loss": 3.065, "learning_rate": 5.871371734713328e-06, "epoch": 2.0879346343700593, "total_flos": 1992618822882570240, "step": 519000 }, { "loss": 3.115, "learning_rate": 5.870560609317398e-06, "epoch": 2.088336933914254, "total_flos": 1993012948924231680, "step": 519100 }, { "loss": 3.11, "learning_rate": 5.869749483921468e-06, "epoch": 2.0887392334584485, "total_flos": 1993388283790848000, "step": 519200 }, { "loss": 3.1825, "learning_rate": 5.868938358525537e-06, "epoch": 2.089141533002643, "total_flos": 1993772955821322240, "step": 519300 }, { "loss": 3.1025, "learning_rate": 5.868127233129606e-06, "epoch": 2.0895438325468376, "total_flos": 1994156868344156160, "step": 519400 }, { "loss": 3.0975, "learning_rate": 5.867316107733676e-06, "epoch": 2.089946132091032, "total_flos": 1994549820601282560, "step": 519500 }, { "loss": 3.1775, "learning_rate": 5.866504982337745e-06, "epoch": 2.0903484316352268, "total_flos": 1994938619466977280, "step": 519600 }, { "loss": 3.12, "learning_rate": 5.865693856941814e-06, "epoch": 2.090750731179422, "total_flos": 1995317980255211520, "step": 519700 }, { "loss": 3.1075, "learning_rate": 5.864882731545884e-06, "epoch": 2.0911530307236164, "total_flos": 1995710358898176000, "step": 519800 }, { "loss": 3.14, "learning_rate": 5.864071606149954e-06, "epoch": 2.091555330267811, "total_flos": 1996095264623308800, "step": 519900 }, { "loss": 3.1275, "learning_rate": 5.863260480754023e-06, "epoch": 2.0919576298120055, "total_flos": 1996461825317744640, "step": 520000 }, { "loss": 3.1225, "learning_rate": 5.862449355358092e-06, "epoch": 2.0923599293562, "total_flos": 1996835125978583040, "step": 520100 }, { "loss": 3.1375, "learning_rate": 5.861638229962161e-06, "epoch": 2.0927622289003946, "total_flos": 1997225311078502400, "step": 520200 }, { "loss": 3.135, "learning_rate": 5.860827104566231e-06, "epoch": 2.0931645284445892, "total_flos": 1997616553115627520, "step": 520300 }, { "loss": 3.1675, "learning_rate": 5.8600159791703005e-06, "epoch": 2.093566827988784, "total_flos": 1998007832331448320, "step": 520400 }, { "loss": 3.09, "learning_rate": 5.85920485377437e-06, "epoch": 2.0939691275329784, "total_flos": 1998394512011489280, "step": 520500 }, { "loss": 3.12, "learning_rate": 5.858393728378439e-06, "epoch": 2.094371427077173, "total_flos": 1998786869409484800, "step": 520600 }, { "loss": 3.1175, "learning_rate": 5.857582602982509e-06, "epoch": 2.0947737266213675, "total_flos": 1999169321340702720, "step": 520700 }, { "loss": 3.06, "learning_rate": 5.856771477586578e-06, "epoch": 2.0951760261655625, "total_flos": 1999557116381614080, "step": 520800 }, { "loss": 3.175, "learning_rate": 5.855960352190647e-06, "epoch": 2.095578325709757, "total_flos": 1999949840255324160, "step": 520900 }, { "loss": 3.1075, "learning_rate": 5.8551492267947165e-06, "epoch": 2.0959806252539517, "total_flos": 2000337799944744960, "step": 521000 }, { "loss": 3.1375, "learning_rate": 5.8543381013987865e-06, "epoch": 2.0963829247981463, "total_flos": 2000725148841308160, "step": 521100 }, { "loss": 3.145, "learning_rate": 5.853526976002856e-06, "epoch": 2.096785224342341, "total_flos": 2001111345198305280, "step": 521200 }, { "loss": 3.1775, "learning_rate": 5.852715850606925e-06, "epoch": 2.0971875238865354, "total_flos": 2001509826458603520, "step": 521300 }, { "loss": 3.1025, "learning_rate": 5.851904725210994e-06, "epoch": 2.09758982343073, "total_flos": 2001894620647649280, "step": 521400 }, { "loss": 3.0925, "learning_rate": 5.851093599815064e-06, "epoch": 2.0979921229749245, "total_flos": 2002269774932029440, "step": 521500 }, { "loss": 3.1175, "learning_rate": 5.850282474419133e-06, "epoch": 2.098394422519119, "total_flos": 2002655424231075840, "step": 521600 }, { "loss": 3.0675, "learning_rate": 5.8494713490232026e-06, "epoch": 2.0987967220633137, "total_flos": 2003018394525757440, "step": 521700 }, { "loss": 3.155, "learning_rate": 5.848660223627272e-06, "epoch": 2.0991990216075087, "total_flos": 2003417491890155520, "step": 521800 }, { "loss": 3.18, "learning_rate": 5.847849098231342e-06, "epoch": 2.0996013211517033, "total_flos": 2003801723087523840, "step": 521900 }, { "loss": 3.12, "learning_rate": 5.847037972835411e-06, "epoch": 2.100003620695898, "total_flos": 2004186543832780800, "step": 522000 }, { "loss": 3.0825, "learning_rate": 5.84622684743948e-06, "epoch": 2.1004059202400924, "total_flos": 2004576171252264960, "step": 522100 }, { "loss": 3.1775, "learning_rate": 5.84541572204355e-06, "epoch": 2.100808219784287, "total_flos": 2004971619793244160, "step": 522200 }, { "loss": 3.135, "learning_rate": 5.8446045966476194e-06, "epoch": 2.1012105193284816, "total_flos": 2005345478134517760, "step": 522300 }, { "loss": 3.165, "learning_rate": 5.843793471251689e-06, "epoch": 2.101612818872676, "total_flos": 2005722050520576000, "step": 522400 }, { "loss": 3.1625, "learning_rate": 5.842982345855758e-06, "epoch": 2.1020151184168707, "total_flos": 2006116027847454720, "step": 522500 }, { "loss": 3.11, "learning_rate": 5.842171220459828e-06, "epoch": 2.1024174179610653, "total_flos": 2006493046377861120, "step": 522600 }, { "loss": 3.12, "learning_rate": 5.841360095063897e-06, "epoch": 2.10281971750526, "total_flos": 2006876236571750400, "step": 522700 }, { "loss": 3.1325, "learning_rate": 5.840548969667966e-06, "epoch": 2.103222017049455, "total_flos": 2007274675342110720, "step": 522800 }, { "loss": 3.11, "learning_rate": 5.8397378442720355e-06, "epoch": 2.1036243165936495, "total_flos": 2007650987477299200, "step": 522900 }, { "loss": 3.1225, "learning_rate": 5.8389267188761055e-06, "epoch": 2.104026616137844, "total_flos": 2008031676076093440, "step": 523000 }, { "loss": 3.0825, "learning_rate": 5.838115593480175e-06, "epoch": 2.1044289156820386, "total_flos": 2008425722449121280, "step": 523100 }, { "loss": 3.12, "learning_rate": 5.837304468084244e-06, "epoch": 2.104831215226233, "total_flos": 2008806336690524160, "step": 523200 }, { "loss": 3.135, "learning_rate": 5.836493342688313e-06, "epoch": 2.1052335147704277, "total_flos": 2009193234131496960, "step": 523300 }, { "loss": 3.1175, "learning_rate": 5.835682217292383e-06, "epoch": 2.1056358143146223, "total_flos": 2009581385025638400, "step": 523400 }, { "loss": 3.155, "learning_rate": 5.834871091896452e-06, "epoch": 2.106038113858817, "total_flos": 2009946001805414400, "step": 523500 }, { "loss": 3.13, "learning_rate": 5.8340599665005215e-06, "epoch": 2.1064404134030115, "total_flos": 2010341630928629760, "step": 523600 }, { "loss": 3.15, "learning_rate": 5.833248841104591e-06, "epoch": 2.106842712947206, "total_flos": 2010737541547683840, "step": 523700 }, { "loss": 3.175, "learning_rate": 5.832437715708661e-06, "epoch": 2.107245012491401, "total_flos": 2011112531183554560, "step": 523800 }, { "loss": 3.1625, "learning_rate": 5.83162659031273e-06, "epoch": 2.1076473120355956, "total_flos": 2011477641908858880, "step": 523900 }, { "loss": 3.065, "learning_rate": 5.830815464916799e-06, "epoch": 2.10804961157979, "total_flos": 2011852647478456320, "step": 524000 }, { "loss": 3.1775, "learning_rate": 5.830004339520868e-06, "epoch": 2.108451911123985, "total_flos": 2012230000617123840, "step": 524100 }, { "loss": 3.1225, "learning_rate": 5.829193214124938e-06, "epoch": 2.1088542106681794, "total_flos": 2012619452765614080, "step": 524200 }, { "loss": 3.105, "learning_rate": 5.828382088729008e-06, "epoch": 2.109256510212374, "total_flos": 2013011353396776960, "step": 524300 }, { "loss": 3.155, "learning_rate": 5.827570963333077e-06, "epoch": 2.1096588097565685, "total_flos": 2013391298421657600, "step": 524400 }, { "loss": 3.0675, "learning_rate": 5.826759837937146e-06, "epoch": 2.110061109300763, "total_flos": 2013781531322757120, "step": 524500 }, { "loss": 3.12, "learning_rate": 5.825948712541216e-06, "epoch": 2.1104634088449576, "total_flos": 2014159824551301120, "step": 524600 }, { "loss": 3.1525, "learning_rate": 5.825137587145285e-06, "epoch": 2.110865708389152, "total_flos": 2014545606631403520, "step": 524700 }, { "loss": 3.075, "learning_rate": 5.824326461749354e-06, "epoch": 2.1112680079333472, "total_flos": 2014912231060746240, "step": 524800 }, { "loss": 3.1425, "learning_rate": 5.823515336353424e-06, "epoch": 2.111670307477542, "total_flos": 2015299633069731840, "step": 524900 }, { "loss": 3.1475, "learning_rate": 5.822704210957494e-06, "epoch": 2.1120726070217364, "total_flos": 2015669911633735680, "step": 525000 }, { "loss": 3.14, "learning_rate": 5.821893085561563e-06, "epoch": 2.112474906565931, "total_flos": 2016064382906142720, "step": 525100 }, { "loss": 3.1475, "learning_rate": 5.821081960165632e-06, "epoch": 2.1128772061101255, "total_flos": 2016434826118656000, "step": 525200 }, { "loss": 3.105, "learning_rate": 5.820270834769701e-06, "epoch": 2.11327950565432, "total_flos": 2016806931749990400, "step": 525300 }, { "loss": 3.05, "learning_rate": 5.819459709373771e-06, "epoch": 2.1136818051985147, "total_flos": 2017201509247242240, "step": 525400 }, { "loss": 3.1075, "learning_rate": 5.8186485839778405e-06, "epoch": 2.1140841047427092, "total_flos": 2017584646328709120, "step": 525500 }, { "loss": 3.1575, "learning_rate": 5.81783745858191e-06, "epoch": 2.114486404286904, "total_flos": 2017960034307747840, "step": 525600 }, { "loss": 3.14, "learning_rate": 5.817026333185979e-06, "epoch": 2.1148887038310984, "total_flos": 2018348025864622080, "step": 525700 }, { "loss": 3.1325, "learning_rate": 5.816215207790049e-06, "epoch": 2.115291003375293, "total_flos": 2018744228601999360, "step": 525800 }, { "loss": 3.16, "learning_rate": 5.815404082394118e-06, "epoch": 2.115693302919488, "total_flos": 2019124986246942720, "step": 525900 }, { "loss": 3.055, "learning_rate": 5.814592956998187e-06, "epoch": 2.1160956024636826, "total_flos": 2019509668899901440, "step": 526000 }, { "loss": 3.1325, "learning_rate": 5.8137818316022565e-06, "epoch": 2.116497902007877, "total_flos": 2019890500902236160, "step": 526100 }, { "loss": 3.0925, "learning_rate": 5.8129707062063265e-06, "epoch": 2.1169002015520717, "total_flos": 2020272952833454080, "step": 526200 }, { "loss": 3.105, "learning_rate": 5.812159580810396e-06, "epoch": 2.1173025010962663, "total_flos": 2020646051667087360, "step": 526300 }, { "loss": 3.095, "learning_rate": 5.811348455414465e-06, "epoch": 2.117704800640461, "total_flos": 2021024573279047680, "step": 526400 }, { "loss": 3.1275, "learning_rate": 5.810537330018534e-06, "epoch": 2.1181071001846554, "total_flos": 2021400598607155200, "step": 526500 }, { "loss": 3.11, "learning_rate": 5.809726204622604e-06, "epoch": 2.11850939972885, "total_flos": 2021796312710246400, "step": 526600 }, { "loss": 3.11, "learning_rate": 5.808915079226673e-06, "epoch": 2.1189116992730446, "total_flos": 2022172768248975360, "step": 526700 }, { "loss": 3.1125, "learning_rate": 5.8081039538307426e-06, "epoch": 2.119313998817239, "total_flos": 2022569661447843840, "step": 526800 }, { "loss": 3.1375, "learning_rate": 5.807292828434813e-06, "epoch": 2.119716298361434, "total_flos": 2022973491129077760, "step": 526900 }, { "loss": 3.095, "learning_rate": 5.806481703038882e-06, "epoch": 2.1201185979056287, "total_flos": 2023364695987507200, "step": 527000 }, { "loss": 3.14, "learning_rate": 5.805670577642951e-06, "epoch": 2.1205208974498233, "total_flos": 2023756012382023680, "step": 527100 }, { "loss": 3.135, "learning_rate": 5.80485945224702e-06, "epoch": 2.120923196994018, "total_flos": 2024117522085089280, "step": 527200 }, { "loss": 3.13, "learning_rate": 5.80404832685109e-06, "epoch": 2.1213254965382125, "total_flos": 2024500568875438080, "step": 527300 }, { "loss": 3.0875, "learning_rate": 5.8032372014551594e-06, "epoch": 2.121727796082407, "total_flos": 2024881682373611520, "step": 527400 }, { "loss": 3.135, "learning_rate": 5.802426076059229e-06, "epoch": 2.1221300956266016, "total_flos": 2025263773140357120, "step": 527500 }, { "loss": 3.1725, "learning_rate": 5.801614950663298e-06, "epoch": 2.122532395170796, "total_flos": 2025653214666362880, "step": 527600 }, { "loss": 3.13, "learning_rate": 5.800803825267368e-06, "epoch": 2.1229346947149907, "total_flos": 2026039697830440960, "step": 527700 }, { "loss": 3.0725, "learning_rate": 5.799992699871437e-06, "epoch": 2.1233369942591853, "total_flos": 2026420954732154880, "step": 527800 }, { "loss": 3.0825, "learning_rate": 5.799181574475506e-06, "epoch": 2.1237392938033803, "total_flos": 2026799131113369600, "step": 527900 }, { "loss": 3.1325, "learning_rate": 5.7983704490795755e-06, "epoch": 2.124141593347575, "total_flos": 2027177015376261120, "step": 528000 }, { "loss": 3.1825, "learning_rate": 5.7975593236836455e-06, "epoch": 2.1245438928917695, "total_flos": 2027572054951587840, "step": 528100 }, { "loss": 3.09, "learning_rate": 5.796748198287715e-06, "epoch": 2.124946192435964, "total_flos": 2027976118327480320, "step": 528200 }, { "loss": 3.11, "learning_rate": 5.795937072891784e-06, "epoch": 2.1253484919801586, "total_flos": 2028374286224486400, "step": 528300 }, { "loss": 3.1225, "learning_rate": 5.795125947495853e-06, "epoch": 2.125750791524353, "total_flos": 2028758177502351360, "step": 528400 }, { "loss": 3.1275, "learning_rate": 5.794314822099923e-06, "epoch": 2.1261530910685478, "total_flos": 2029134797689589760, "step": 528500 }, { "loss": 3.0675, "learning_rate": 5.793503696703992e-06, "epoch": 2.1265553906127423, "total_flos": 2029518009128448000, "step": 528600 }, { "loss": 3.1075, "learning_rate": 5.7926925713080615e-06, "epoch": 2.126957690156937, "total_flos": 2029899420056186880, "step": 528700 }, { "loss": 3.1475, "learning_rate": 5.791881445912131e-06, "epoch": 2.1273599897011315, "total_flos": 2030271106099384320, "step": 528800 }, { "loss": 3.1175, "learning_rate": 5.791070320516201e-06, "epoch": 2.127762289245326, "total_flos": 2030663548477255680, "step": 528900 }, { "loss": 3.1925, "learning_rate": 5.79025919512027e-06, "epoch": 2.128164588789521, "total_flos": 2031054997652828160, "step": 529000 }, { "loss": 3.1475, "learning_rate": 5.789448069724339e-06, "epoch": 2.1285668883337157, "total_flos": 2031430815842488320, "step": 529100 }, { "loss": 3.125, "learning_rate": 5.788636944328408e-06, "epoch": 2.1289691878779102, "total_flos": 2031813533335818240, "step": 529200 }, { "loss": 3.1175, "learning_rate": 5.787825818932478e-06, "epoch": 2.129371487422105, "total_flos": 2032191178592808960, "step": 529300 }, { "loss": 3.125, "learning_rate": 5.787014693536548e-06, "epoch": 2.1297737869662994, "total_flos": 2032572632010485760, "step": 529400 }, { "loss": 3.1, "learning_rate": 5.786203568140617e-06, "epoch": 2.130176086510494, "total_flos": 2032941258778152960, "step": 529500 }, { "loss": 3.1375, "learning_rate": 5.785392442744686e-06, "epoch": 2.1305783860546885, "total_flos": 2033333663977328640, "step": 529600 }, { "loss": 3.1325, "learning_rate": 5.784581317348756e-06, "epoch": 2.130980685598883, "total_flos": 2033719940002959360, "step": 529700 }, { "loss": 3.1075, "learning_rate": 5.783770191952825e-06, "epoch": 2.1313829851430777, "total_flos": 2034093734609326080, "step": 529800 }, { "loss": 3.14, "learning_rate": 5.782959066556894e-06, "epoch": 2.1317852846872722, "total_flos": 2034467954115072000, "step": 529900 }, { "loss": 3.115, "learning_rate": 5.782147941160964e-06, "epoch": 2.1321875842314673, "total_flos": 2034843910397030400, "step": 530000 }, { "loss": 3.1, "learning_rate": 5.781336815765034e-06, "epoch": 2.132589883775662, "total_flos": 2035223212761600000, "step": 530100 }, { "loss": 3.1225, "learning_rate": 5.780525690369103e-06, "epoch": 2.1329921833198564, "total_flos": 2035620084715499520, "step": 530200 }, { "loss": 3.16, "learning_rate": 5.779714564973172e-06, "epoch": 2.133394482864051, "total_flos": 2035996710213980160, "step": 530300 }, { "loss": 3.1375, "learning_rate": 5.778903439577241e-06, "epoch": 2.1337967824082456, "total_flos": 2036377531593830400, "step": 530400 }, { "loss": 3.1125, "learning_rate": 5.778092314181311e-06, "epoch": 2.13419908195244, "total_flos": 2036766728802693120, "step": 530500 }, { "loss": 3.0575, "learning_rate": 5.7772811887853805e-06, "epoch": 2.1346013814966347, "total_flos": 2037147953836953600, "step": 530600 }, { "loss": 3.0925, "learning_rate": 5.77647006338945e-06, "epoch": 2.1350036810408293, "total_flos": 2037514126810705920, "step": 530700 }, { "loss": 3.1875, "learning_rate": 5.775658937993519e-06, "epoch": 2.135405980585024, "total_flos": 2037880161692160000, "step": 530800 }, { "loss": 3.09, "learning_rate": 5.774847812597589e-06, "epoch": 2.1358082801292184, "total_flos": 2038255071659397120, "step": 530900 }, { "loss": 3.08, "learning_rate": 5.774036687201658e-06, "epoch": 2.1362105796734134, "total_flos": 2038640991831797760, "step": 531000 }, { "loss": 3.1375, "learning_rate": 5.773225561805727e-06, "epoch": 2.136612879217608, "total_flos": 2039022636454195200, "step": 531100 }, { "loss": 3.085, "learning_rate": 5.7724144364097965e-06, "epoch": 2.1370151787618026, "total_flos": 2039400807524167680, "step": 531200 }, { "loss": 3.12, "learning_rate": 5.7716033110138665e-06, "epoch": 2.137417478305997, "total_flos": 2039792341679616000, "step": 531300 }, { "loss": 3.145, "learning_rate": 5.770792185617936e-06, "epoch": 2.1378197778501917, "total_flos": 2040190350239354880, "step": 531400 }, { "loss": 3.035, "learning_rate": 5.769981060222005e-06, "epoch": 2.1382220773943863, "total_flos": 2040580731855237120, "step": 531500 }, { "loss": 3.1225, "learning_rate": 5.769169934826076e-06, "epoch": 2.138624376938581, "total_flos": 2040965919076208640, "step": 531600 }, { "loss": 3.0475, "learning_rate": 5.768358809430144e-06, "epoch": 2.1390266764827754, "total_flos": 2041359779555758080, "step": 531700 }, { "loss": 3.1325, "learning_rate": 5.767547684034213e-06, "epoch": 2.13942897602697, "total_flos": 2041751605829529600, "step": 531800 }, { "loss": 3.1425, "learning_rate": 5.7667365586382826e-06, "epoch": 2.1398312755711646, "total_flos": 2042130977240248320, "step": 531900 }, { "loss": 3.16, "learning_rate": 5.7659254332423535e-06, "epoch": 2.140233575115359, "total_flos": 2042497670715740160, "step": 532000 }, { "loss": 3.125, "learning_rate": 5.765114307846423e-06, "epoch": 2.140635874659554, "total_flos": 2042879649946398720, "step": 532100 }, { "loss": 3.1075, "learning_rate": 5.764303182450491e-06, "epoch": 2.1410381742037488, "total_flos": 2043244298593628160, "step": 532200 }, { "loss": 3.1325, "learning_rate": 5.76349205705456e-06, "epoch": 2.1414404737479433, "total_flos": 2043629342411059200, "step": 532300 }, { "loss": 3.0875, "learning_rate": 5.762680931658631e-06, "epoch": 2.141842773292138, "total_flos": 2044017403014082560, "step": 532400 }, { "loss": 3.1175, "learning_rate": 5.7618698062627e-06, "epoch": 2.1422450728363325, "total_flos": 2044401028729835520, "step": 532500 }, { "loss": 3.1375, "learning_rate": 5.761058680866769e-06, "epoch": 2.142647372380527, "total_flos": 2044784670379315200, "step": 532600 }, { "loss": 3.115, "learning_rate": 5.760247555470838e-06, "epoch": 2.1430496719247216, "total_flos": 2045169698263019520, "step": 532700 }, { "loss": 3.1725, "learning_rate": 5.759436430074909e-06, "epoch": 2.143451971468916, "total_flos": 2045558985763000320, "step": 532800 }, { "loss": 3.1375, "learning_rate": 5.758625304678978e-06, "epoch": 2.1438542710131108, "total_flos": 2045947896164782080, "step": 532900 }, { "loss": 3.1325, "learning_rate": 5.757814179283046e-06, "epoch": 2.144256570557306, "total_flos": 2046320729436303360, "step": 533000 }, { "loss": 3.1125, "learning_rate": 5.7570030538871155e-06, "epoch": 2.1446588701015004, "total_flos": 2046693185609625600, "step": 533100 }, { "loss": 3.145, "learning_rate": 5.756191928491186e-06, "epoch": 2.145061169645695, "total_flos": 2047071935605002240, "step": 533200 }, { "loss": 3.15, "learning_rate": 5.7553808030952555e-06, "epoch": 2.1454634691898895, "total_flos": 2047474442786918400, "step": 533300 }, { "loss": 3.055, "learning_rate": 5.754569677699324e-06, "epoch": 2.145865768734084, "total_flos": 2047861621723729920, "step": 533400 }, { "loss": 3.15, "learning_rate": 5.753758552303393e-06, "epoch": 2.1462680682782787, "total_flos": 2048238406559477760, "step": 533500 }, { "loss": 3.1075, "learning_rate": 5.752947426907464e-06, "epoch": 2.1466703678224732, "total_flos": 2048617161866096640, "step": 533600 }, { "loss": 3.1, "learning_rate": 5.752136301511533e-06, "epoch": 2.147072667366668, "total_flos": 2049009460840427520, "step": 533700 }, { "loss": 3.1225, "learning_rate": 5.7513251761156015e-06, "epoch": 2.1474749669108624, "total_flos": 2049391047039160320, "step": 533800 }, { "loss": 3.095, "learning_rate": 5.750514050719671e-06, "epoch": 2.147877266455057, "total_flos": 2049771379784724480, "step": 533900 }, { "loss": 3.0775, "learning_rate": 5.749702925323742e-06, "epoch": 2.1482795659992515, "total_flos": 2050170424036700160, "step": 534000 }, { "loss": 3.06, "learning_rate": 5.748891799927811e-06, "epoch": 2.1486818655434465, "total_flos": 2050543830922383360, "step": 534100 }, { "loss": 3.1375, "learning_rate": 5.748080674531879e-06, "epoch": 2.149084165087641, "total_flos": 2050922480004157440, "step": 534200 }, { "loss": 3.12, "learning_rate": 5.747269549135948e-06, "epoch": 2.1494864646318357, "total_flos": 2051291011169464320, "step": 534300 }, { "loss": 3.0375, "learning_rate": 5.746458423740019e-06, "epoch": 2.1498887641760303, "total_flos": 2051680989130936320, "step": 534400 }, { "loss": 3.13, "learning_rate": 5.7456472983440884e-06, "epoch": 2.150291063720225, "total_flos": 2052064875097559040, "step": 534500 }, { "loss": 3.0525, "learning_rate": 5.744836172948158e-06, "epoch": 2.1506933632644194, "total_flos": 2052435907857960960, "step": 534600 }, { "loss": 3.1325, "learning_rate": 5.744025047552226e-06, "epoch": 2.151095662808614, "total_flos": 2052822454756945920, "step": 534700 }, { "loss": 3.0975, "learning_rate": 5.743213922156297e-06, "epoch": 2.1514979623528085, "total_flos": 2053219794100162560, "step": 534800 }, { "loss": 3.07, "learning_rate": 5.742402796760366e-06, "epoch": 2.151900261897003, "total_flos": 2053600711082373120, "step": 534900 }, { "loss": 3.135, "learning_rate": 5.741591671364435e-06, "epoch": 2.1523025614411977, "total_flos": 2053994624674344960, "step": 535000 }, { "loss": 3.085, "learning_rate": 5.740780545968504e-06, "epoch": 2.1527048609853927, "total_flos": 2054383402295070720, "step": 535100 }, { "loss": 3.1175, "learning_rate": 5.7399694205725745e-06, "epoch": 2.1531071605295873, "total_flos": 2054764186496225280, "step": 535200 }, { "loss": 3.08, "learning_rate": 5.739158295176644e-06, "epoch": 2.153509460073782, "total_flos": 2055142745286881280, "step": 535300 }, { "loss": 3.11, "learning_rate": 5.738347169780713e-06, "epoch": 2.1539117596179764, "total_flos": 2055531188299345920, "step": 535400 }, { "loss": 3.0775, "learning_rate": 5.737536044384781e-06, "epoch": 2.154314059162171, "total_flos": 2055905126309253120, "step": 535500 }, { "loss": 3.065, "learning_rate": 5.736724918988852e-06, "epoch": 2.1547163587063656, "total_flos": 2056281576536739840, "step": 535600 }, { "loss": 3.135, "learning_rate": 5.735913793592921e-06, "epoch": 2.15511865825056, "total_flos": 2056667045253550080, "step": 535700 }, { "loss": 3.1, "learning_rate": 5.7351026681969905e-06, "epoch": 2.1555209577947547, "total_flos": 2057048748299612160, "step": 535800 }, { "loss": 3.1, "learning_rate": 5.734291542801059e-06, "epoch": 2.1559232573389493, "total_flos": 2057431444547973120, "step": 535900 }, { "loss": 3.125, "learning_rate": 5.73348041740513e-06, "epoch": 2.156325556883144, "total_flos": 2057815994419875840, "step": 536000 }, { "loss": 3.12, "learning_rate": 5.732669292009199e-06, "epoch": 2.156727856427339, "total_flos": 2058190872519659520, "step": 536100 }, { "loss": 3.125, "learning_rate": 5.731858166613268e-06, "epoch": 2.1571301559715335, "total_flos": 2058570902524416000, "step": 536200 }, { "loss": 3.1125, "learning_rate": 5.731047041217338e-06, "epoch": 2.157532455515728, "total_flos": 2058962845645516800, "step": 536300 }, { "loss": 3.1025, "learning_rate": 5.730235915821407e-06, "epoch": 2.1579347550599226, "total_flos": 2059332024782376960, "step": 536400 }, { "loss": 3.115, "learning_rate": 5.729424790425477e-06, "epoch": 2.158337054604117, "total_flos": 2059718916912107520, "step": 536500 }, { "loss": 3.1, "learning_rate": 5.728613665029546e-06, "epoch": 2.1587393541483118, "total_flos": 2060116532439920640, "step": 536600 }, { "loss": 3.0775, "learning_rate": 5.727802539633616e-06, "epoch": 2.1591416536925063, "total_flos": 2060507816966983680, "step": 536700 }, { "loss": 3.0325, "learning_rate": 5.726991414237685e-06, "epoch": 2.159543953236701, "total_flos": 2060888489632051200, "step": 536800 }, { "loss": 3.115, "learning_rate": 5.726180288841754e-06, "epoch": 2.1599462527808955, "total_flos": 2061265906505625600, "step": 536900 }, { "loss": 3.09, "learning_rate": 5.725369163445823e-06, "epoch": 2.16034855232509, "total_flos": 2061657095430328320, "step": 537000 }, { "loss": 3.18, "learning_rate": 5.7245580380498935e-06, "epoch": 2.1607508518692846, "total_flos": 2062032350628311040, "step": 537100 }, { "loss": 3.075, "learning_rate": 5.723746912653963e-06, "epoch": 2.1611531514134796, "total_flos": 2062420549323632640, "step": 537200 }, { "loss": 3.0425, "learning_rate": 5.722935787258032e-06, "epoch": 2.161555450957674, "total_flos": 2062807834485288960, "step": 537300 }, { "loss": 3.1075, "learning_rate": 5.722124661862101e-06, "epoch": 2.161957750501869, "total_flos": 2063193605942906880, "step": 537400 }, { "loss": 3.175, "learning_rate": 5.721313536466171e-06, "epoch": 2.1623600500460634, "total_flos": 2063578644449095680, "step": 537500 }, { "loss": 3.1125, "learning_rate": 5.72050241107024e-06, "epoch": 2.162762349590258, "total_flos": 2063963587352924160, "step": 537600 }, { "loss": 3.1175, "learning_rate": 5.7196912856743095e-06, "epoch": 2.1631646491344525, "total_flos": 2064337275734446080, "step": 537700 }, { "loss": 3.1125, "learning_rate": 5.718880160278379e-06, "epoch": 2.163566948678647, "total_flos": 2064733239465922560, "step": 537800 }, { "loss": 3.0725, "learning_rate": 5.718069034882449e-06, "epoch": 2.1639692482228416, "total_flos": 2065100634025390080, "step": 537900 }, { "loss": 3.1275, "learning_rate": 5.717257909486518e-06, "epoch": 2.164371547767036, "total_flos": 2065488599026053120, "step": 538000 }, { "loss": 3.08, "learning_rate": 5.716446784090587e-06, "epoch": 2.164773847311231, "total_flos": 2065865914986024960, "step": 538100 }, { "loss": 3.09, "learning_rate": 5.715635658694656e-06, "epoch": 2.165176146855426, "total_flos": 2066256036351037440, "step": 538200 }, { "loss": 3.11, "learning_rate": 5.714824533298726e-06, "epoch": 2.1655784463996204, "total_flos": 2066636400964055040, "step": 538300 }, { "loss": 3.0375, "learning_rate": 5.7140134079027955e-06, "epoch": 2.165980745943815, "total_flos": 2067012718410485760, "step": 538400 }, { "loss": 3.13, "learning_rate": 5.713202282506865e-06, "epoch": 2.1663830454880095, "total_flos": 2067410620745379840, "step": 538500 }, { "loss": 3.0525, "learning_rate": 5.712391157110934e-06, "epoch": 2.166785345032204, "total_flos": 2067809611884933120, "step": 538600 }, { "loss": 3.1525, "learning_rate": 5.711580031715004e-06, "epoch": 2.1671876445763987, "total_flos": 2068196684596899840, "step": 538700 }, { "loss": 3.1675, "learning_rate": 5.710768906319073e-06, "epoch": 2.1675899441205932, "total_flos": 2068582025843896320, "step": 538800 }, { "loss": 3.0925, "learning_rate": 5.709957780923142e-06, "epoch": 2.167992243664788, "total_flos": 2068968445273067520, "step": 538900 }, { "loss": 3.1325, "learning_rate": 5.7091466555272116e-06, "epoch": 2.1683945432089824, "total_flos": 2069348990468321280, "step": 539000 }, { "loss": 3.08, "learning_rate": 5.708335530131282e-06, "epoch": 2.168796842753177, "total_flos": 2069731532690657280, "step": 539100 }, { "loss": 3.0675, "learning_rate": 5.707524404735351e-06, "epoch": 2.169199142297372, "total_flos": 2070126598822195200, "step": 539200 }, { "loss": 3.0575, "learning_rate": 5.70671327933942e-06, "epoch": 2.1696014418415666, "total_flos": 2070522520063733760, "step": 539300 }, { "loss": 3.1425, "learning_rate": 5.705902153943489e-06, "epoch": 2.170003741385761, "total_flos": 2070903978792652800, "step": 539400 }, { "loss": 3.1675, "learning_rate": 5.705091028547559e-06, "epoch": 2.1704060409299557, "total_flos": 2071279908518400000, "step": 539500 }, { "loss": 3.1275, "learning_rate": 5.7042799031516284e-06, "epoch": 2.1708083404741503, "total_flos": 2071664081292103680, "step": 539600 }, { "loss": 3.125, "learning_rate": 5.703468777755698e-06, "epoch": 2.171210640018345, "total_flos": 2072038173328035840, "step": 539700 }, { "loss": 3.135, "learning_rate": 5.702657652359767e-06, "epoch": 2.1716129395625394, "total_flos": 2072415478665523200, "step": 539800 }, { "loss": 3.0875, "learning_rate": 5.701846526963837e-06, "epoch": 2.172015239106734, "total_flos": 2072805637209231360, "step": 539900 }, { "loss": 3.0625, "learning_rate": 5.701035401567906e-06, "epoch": 2.1724175386509286, "total_flos": 2073186463900323840, "step": 540000 }, { "loss": 3.055, "learning_rate": 5.700224276171975e-06, "epoch": 2.172819838195123, "total_flos": 2073569616915517440, "step": 540100 }, { "loss": 3.1225, "learning_rate": 5.6994131507760445e-06, "epoch": 2.1732221377393177, "total_flos": 2073962871913451520, "step": 540200 }, { "loss": 3.0925, "learning_rate": 5.6986020253801145e-06, "epoch": 2.1736244372835127, "total_flos": 2074358490414182400, "step": 540300 }, { "loss": 3.08, "learning_rate": 5.697790899984184e-06, "epoch": 2.1740267368277073, "total_flos": 2074730760694026240, "step": 540400 }, { "loss": 3.13, "learning_rate": 5.696979774588253e-06, "epoch": 2.174429036371902, "total_flos": 2075116367503134720, "step": 540500 }, { "loss": 3.14, "learning_rate": 5.696168649192322e-06, "epoch": 2.1748313359160965, "total_flos": 2075511794799144960, "step": 540600 }, { "loss": 3.1675, "learning_rate": 5.695357523796392e-06, "epoch": 2.175233635460291, "total_flos": 2075883289637621760, "step": 540700 }, { "loss": 3.1325, "learning_rate": 5.694546398400461e-06, "epoch": 2.1756359350044856, "total_flos": 2076257344494858240, "step": 540800 }, { "loss": 3.0975, "learning_rate": 5.6937352730045305e-06, "epoch": 2.17603823454868, "total_flos": 2076651003147202560, "step": 540900 }, { "loss": 3.1325, "learning_rate": 5.6929241476086006e-06, "epoch": 2.1764405340928747, "total_flos": 2077032047599226880, "step": 541000 }, { "loss": 3.115, "learning_rate": 5.69211302221267e-06, "epoch": 2.1768428336370693, "total_flos": 2077408635919011840, "step": 541100 }, { "loss": 3.065, "learning_rate": 5.691301896816739e-06, "epoch": 2.1772451331812643, "total_flos": 2077799447745515520, "step": 541200 }, { "loss": 3.12, "learning_rate": 5.690490771420808e-06, "epoch": 2.177647432725459, "total_flos": 2078183131884933120, "step": 541300 }, { "loss": 3.105, "learning_rate": 5.689679646024878e-06, "epoch": 2.1780497322696535, "total_flos": 2078559916720680960, "step": 541400 }, { "loss": 3.0825, "learning_rate": 5.688868520628947e-06, "epoch": 2.178452031813848, "total_flos": 2078946522043330560, "step": 541500 }, { "loss": 3.105, "learning_rate": 5.688057395233017e-06, "epoch": 2.1788543313580426, "total_flos": 2079313932536524800, "step": 541600 }, { "loss": 3.1075, "learning_rate": 5.687246269837086e-06, "epoch": 2.179256630902237, "total_flos": 2079699109135011840, "step": 541700 }, { "loss": 3.0625, "learning_rate": 5.686435144441156e-06, "epoch": 2.1796589304464318, "total_flos": 2080086351806730240, "step": 541800 }, { "loss": 3.1475, "learning_rate": 5.685624019045225e-06, "epoch": 2.1800612299906263, "total_flos": 2080457926313840640, "step": 541900 }, { "loss": 3.11, "learning_rate": 5.684812893649294e-06, "epoch": 2.180463529534821, "total_flos": 2080853661661900800, "step": 542000 }, { "loss": 3.0775, "learning_rate": 5.684001768253363e-06, "epoch": 2.1808658290790155, "total_flos": 2081237999084113920, "step": 542100 }, { "loss": 3.0725, "learning_rate": 5.6831906428574335e-06, "epoch": 2.18126812862321, "total_flos": 2081624769055272960, "step": 542200 }, { "loss": 3.0825, "learning_rate": 5.682379517461503e-06, "epoch": 2.181670428167405, "total_flos": 2082005558567669760, "step": 542300 }, { "loss": 3.1175, "learning_rate": 5.681568392065572e-06, "epoch": 2.1820727277115997, "total_flos": 2082397039610695680, "step": 542400 }, { "loss": 3.0675, "learning_rate": 5.680757266669641e-06, "epoch": 2.1824750272557942, "total_flos": 2082784037965271040, "step": 542500 }, { "loss": 3.0625, "learning_rate": 5.679946141273711e-06, "epoch": 2.182877326799989, "total_flos": 2083172539401400320, "step": 542600 }, { "loss": 3.155, "learning_rate": 5.67913501587778e-06, "epoch": 2.1832796263441834, "total_flos": 2083546137491804160, "step": 542700 }, { "loss": 3.1175, "learning_rate": 5.6783238904818495e-06, "epoch": 2.183681925888378, "total_flos": 2083933921910231040, "step": 542800 }, { "loss": 3.135, "learning_rate": 5.677512765085919e-06, "epoch": 2.1840842254325725, "total_flos": 2084326348354375680, "step": 542900 }, { "loss": 3.145, "learning_rate": 5.676701639689989e-06, "epoch": 2.184486524976767, "total_flos": 2084716772460195840, "step": 543000 }, { "loss": 3.1275, "learning_rate": 5.675890514294058e-06, "epoch": 2.1848888245209617, "total_flos": 2085102352713093120, "step": 543100 }, { "loss": 3.0775, "learning_rate": 5.675079388898127e-06, "epoch": 2.1852911240651562, "total_flos": 2085472169199022080, "step": 543200 }, { "loss": 3.1275, "learning_rate": 5.674268263502196e-06, "epoch": 2.185693423609351, "total_flos": 2085866412088012800, "step": 543300 }, { "loss": 3.06, "learning_rate": 5.673457138106266e-06, "epoch": 2.186095723153546, "total_flos": 2086273922460119040, "step": 543400 }, { "loss": 3.0475, "learning_rate": 5.6726460127103355e-06, "epoch": 2.1864980226977404, "total_flos": 2086666672890040320, "step": 543500 }, { "loss": 3.1125, "learning_rate": 5.671834887314405e-06, "epoch": 2.186900322241935, "total_flos": 2087046166459330560, "step": 543600 }, { "loss": 3.1, "learning_rate": 5.671023761918474e-06, "epoch": 2.1873026217861296, "total_flos": 2087429197315952640, "step": 543700 }, { "loss": 3.1, "learning_rate": 5.670212636522544e-06, "epoch": 2.187704921330324, "total_flos": 2087821012967239680, "step": 543800 }, { "loss": 3.09, "learning_rate": 5.669401511126613e-06, "epoch": 2.1881072208745187, "total_flos": 2088204590881812480, "step": 543900 }, { "loss": 3.09, "learning_rate": 5.668590385730682e-06, "epoch": 2.1885095204187133, "total_flos": 2088585863717253120, "step": 544000 }, { "loss": 3.1275, "learning_rate": 5.6677792603347516e-06, "epoch": 2.188911819962908, "total_flos": 2088970153338286080, "step": 544100 }, { "loss": 3.0875, "learning_rate": 5.666968134938822e-06, "epoch": 2.1893141195071024, "total_flos": 2089350581686210560, "step": 544200 }, { "loss": 3.0925, "learning_rate": 5.666157009542891e-06, "epoch": 2.1897164190512974, "total_flos": 2089735976045629440, "step": 544300 }, { "loss": 3.0875, "learning_rate": 5.66534588414696e-06, "epoch": 2.190118718595492, "total_flos": 2090104087622799360, "step": 544400 }, { "loss": 3.0975, "learning_rate": 5.664534758751029e-06, "epoch": 2.1905210181396866, "total_flos": 2090486799804887040, "step": 544500 }, { "loss": 3.155, "learning_rate": 5.663723633355099e-06, "epoch": 2.190923317683881, "total_flos": 2090865523244052480, "step": 544600 }, { "loss": 3.11, "learning_rate": 5.6629125079591684e-06, "epoch": 2.1913256172280757, "total_flos": 2091238568965263360, "step": 544700 }, { "loss": 3.095, "learning_rate": 5.662101382563238e-06, "epoch": 2.1917279167722703, "total_flos": 2091608661635788800, "step": 544800 }, { "loss": 3.0975, "learning_rate": 5.661290257167307e-06, "epoch": 2.192130216316465, "total_flos": 2091983545046814720, "step": 544900 }, { "loss": 3.1, "learning_rate": 5.660479131771377e-06, "epoch": 2.1925325158606594, "total_flos": 2092373692968038400, "step": 545000 }, { "loss": 3.1075, "learning_rate": 5.659668006375446e-06, "epoch": 2.192934815404854, "total_flos": 2092746510305832960, "step": 545100 }, { "loss": 3.1075, "learning_rate": 5.658856880979515e-06, "epoch": 2.1933371149490486, "total_flos": 2093137895746498560, "step": 545200 }, { "loss": 3.075, "learning_rate": 5.6580457555835845e-06, "epoch": 2.193739414493243, "total_flos": 2093522519975792640, "step": 545300 }, { "loss": 3.095, "learning_rate": 5.6572346301876545e-06, "epoch": 2.194141714037438, "total_flos": 2093902534046822400, "step": 545400 }, { "loss": 3.0725, "learning_rate": 5.656423504791724e-06, "epoch": 2.1945440135816328, "total_flos": 2094289845764689920, "step": 545500 }, { "loss": 3.09, "learning_rate": 5.655612379395793e-06, "epoch": 2.1949463131258273, "total_flos": 2094669950126837760, "step": 545600 }, { "loss": 3.04, "learning_rate": 5.654801253999862e-06, "epoch": 2.195348612670022, "total_flos": 2095071320702914560, "step": 545700 }, { "loss": 3.08, "learning_rate": 5.653990128603932e-06, "epoch": 2.1957509122142165, "total_flos": 2095455695303823360, "step": 545800 }, { "loss": 3.1025, "learning_rate": 5.653179003208001e-06, "epoch": 2.196153211758411, "total_flos": 2095844552593182720, "step": 545900 }, { "loss": 3.12, "learning_rate": 5.6523678778120705e-06, "epoch": 2.1965555113026056, "total_flos": 2096219393514270720, "step": 546000 }, { "loss": 3.1075, "learning_rate": 5.6515567524161406e-06, "epoch": 2.1969578108468, "total_flos": 2096597240598466560, "step": 546100 }, { "loss": 3.075, "learning_rate": 5.65074562702021e-06, "epoch": 2.1973601103909948, "total_flos": 2096987919643914240, "step": 546200 }, { "loss": 3.105, "learning_rate": 5.649934501624279e-06, "epoch": 2.1977624099351893, "total_flos": 2097381488005140480, "step": 546300 }, { "loss": 3.11, "learning_rate": 5.649123376228348e-06, "epoch": 2.1981647094793844, "total_flos": 2097767689673379840, "step": 546400 }, { "loss": 3.1025, "learning_rate": 5.648312250832418e-06, "epoch": 2.198567009023579, "total_flos": 2098149668904038400, "step": 546500 }, { "loss": 3.1125, "learning_rate": 5.647501125436487e-06, "epoch": 2.1989693085677735, "total_flos": 2098516851013816320, "step": 546600 }, { "loss": 3.0775, "learning_rate": 5.646690000040557e-06, "epoch": 2.199371608111968, "total_flos": 2098901236237209600, "step": 546700 }, { "loss": 3.16, "learning_rate": 5.645878874644626e-06, "epoch": 2.1997739076561627, "total_flos": 2099293211225763840, "step": 546800 }, { "loss": 3.12, "learning_rate": 5.645067749248696e-06, "epoch": 2.2001762072003572, "total_flos": 2099678515294064640, "step": 546900 }, { "loss": 3.0925, "learning_rate": 5.644256623852765e-06, "epoch": 2.200578506744552, "total_flos": 2100060961914040320, "step": 547000 }, { "loss": 3.125, "learning_rate": 5.643445498456834e-06, "epoch": 2.2009808062887464, "total_flos": 2100446271293583360, "step": 547100 }, { "loss": 3.085, "learning_rate": 5.642634373060903e-06, "epoch": 2.201383105832941, "total_flos": 2100823284512747520, "step": 547200 }, { "loss": 3.0325, "learning_rate": 5.6418232476649735e-06, "epoch": 2.2017854053771355, "total_flos": 2101219434137702400, "step": 547300 }, { "loss": 3.11, "learning_rate": 5.641012122269043e-06, "epoch": 2.2021877049213305, "total_flos": 2101593239366553600, "step": 547400 }, { "loss": 3.1175, "learning_rate": 5.640200996873112e-06, "epoch": 2.202590004465525, "total_flos": 2101981469929328640, "step": 547500 }, { "loss": 3.0725, "learning_rate": 5.639389871477181e-06, "epoch": 2.2029923040097197, "total_flos": 2102353703030476800, "step": 547600 }, { "loss": 3.115, "learning_rate": 5.638578746081251e-06, "epoch": 2.2033946035539143, "total_flos": 2102738210412441600, "step": 547700 }, { "loss": 3.045, "learning_rate": 5.63776762068532e-06, "epoch": 2.203796903098109, "total_flos": 2103115945960550400, "step": 547800 }, { "loss": 3.085, "learning_rate": 5.6369564952893895e-06, "epoch": 2.2041992026423034, "total_flos": 2103511378567802880, "step": 547900 }, { "loss": 3.1, "learning_rate": 5.636145369893459e-06, "epoch": 2.204601502186498, "total_flos": 2103903215464058880, "step": 548000 }, { "loss": 3.085, "learning_rate": 5.635334244497529e-06, "epoch": 2.2050038017306925, "total_flos": 2104283898751610880, "step": 548100 }, { "loss": 3.08, "learning_rate": 5.634523119101598e-06, "epoch": 2.205406101274887, "total_flos": 2104675411662090240, "step": 548200 }, { "loss": 3.12, "learning_rate": 5.633711993705667e-06, "epoch": 2.2058084008190817, "total_flos": 2105052185875353600, "step": 548300 }, { "loss": 3.1025, "learning_rate": 5.632900868309736e-06, "epoch": 2.2062107003632763, "total_flos": 2105435636320112640, "step": 548400 }, { "loss": 3.13, "learning_rate": 5.632089742913806e-06, "epoch": 2.2066129999074713, "total_flos": 2105810360393871360, "step": 548500 }, { "loss": 3.085, "learning_rate": 5.6312786175178755e-06, "epoch": 2.207015299451666, "total_flos": 2106185668704276480, "step": 548600 }, { "loss": 3.0525, "learning_rate": 5.630467492121945e-06, "epoch": 2.2074175989958604, "total_flos": 2106564285918597120, "step": 548700 }, { "loss": 3.1, "learning_rate": 5.629656366726014e-06, "epoch": 2.207819898540055, "total_flos": 2106951071823482880, "step": 548800 }, { "loss": 3.11, "learning_rate": 5.628845241330084e-06, "epoch": 2.2082221980842496, "total_flos": 2107347147091046400, "step": 548900 }, { "loss": 3.1125, "learning_rate": 5.628034115934153e-06, "epoch": 2.208624497628444, "total_flos": 2107717330052689920, "step": 549000 }, { "loss": 3.1, "learning_rate": 5.627222990538222e-06, "epoch": 2.2090267971726387, "total_flos": 2108086939400171520, "step": 549100 }, { "loss": 3.095, "learning_rate": 5.6264118651422916e-06, "epoch": 2.2094290967168333, "total_flos": 2108466236453498880, "step": 549200 }, { "loss": 3.0625, "learning_rate": 5.625600739746362e-06, "epoch": 2.209831396261028, "total_flos": 2108871638262435840, "step": 549300 }, { "loss": 3.0775, "learning_rate": 5.624789614350431e-06, "epoch": 2.210233695805223, "total_flos": 2109252688025702400, "step": 549400 }, { "loss": 3.09, "learning_rate": 5.6239784889545e-06, "epoch": 2.2106359953494175, "total_flos": 2109644004420218880, "step": 549500 }, { "loss": 3.1375, "learning_rate": 5.623167363558569e-06, "epoch": 2.211038294893612, "total_flos": 2110035395172126720, "step": 549600 }, { "loss": 3.0575, "learning_rate": 5.622356238162639e-06, "epoch": 2.2114405944378066, "total_flos": 2110407368022405120, "step": 549700 }, { "loss": 3.115, "learning_rate": 5.6215451127667084e-06, "epoch": 2.211842893982001, "total_flos": 2110783850117345280, "step": 549800 }, { "loss": 3.045, "learning_rate": 5.620733987370778e-06, "epoch": 2.2122451935261958, "total_flos": 2111158521078681600, "step": 549900 }, { "loss": 3.0825, "learning_rate": 5.619922861974847e-06, "epoch": 2.2126474930703903, "total_flos": 2111538508593500160, "step": 550000 }, { "loss": 3.0975, "learning_rate": 5.619111736578917e-06, "epoch": 2.213049792614585, "total_flos": 2111917832203038720, "step": 550100 }, { "loss": 3.0525, "learning_rate": 5.618300611182986e-06, "epoch": 2.2134520921587795, "total_flos": 2112285263941201920, "step": 550200 }, { "loss": 3.1025, "learning_rate": 5.617489485787055e-06, "epoch": 2.213854391702974, "total_flos": 2112665782580244480, "step": 550300 }, { "loss": 3.11, "learning_rate": 5.6166783603911245e-06, "epoch": 2.2142566912471686, "total_flos": 2113040235780648960, "step": 550400 }, { "loss": 3.0925, "learning_rate": 5.6158672349951945e-06, "epoch": 2.2146589907913636, "total_flos": 2113439280032624640, "step": 550500 }, { "loss": 3.1025, "learning_rate": 5.615056109599264e-06, "epoch": 2.215061290335558, "total_flos": 2113816027689676800, "step": 550600 }, { "loss": 3.0775, "learning_rate": 5.614244984203333e-06, "epoch": 2.215463589879753, "total_flos": 2114202542721208320, "step": 550700 }, { "loss": 3.0325, "learning_rate": 5.613433858807403e-06, "epoch": 2.2158658894239474, "total_flos": 2114587066036899840, "step": 550800 }, { "loss": 3.1575, "learning_rate": 5.612622733411472e-06, "epoch": 2.216268188968142, "total_flos": 2114959389429166080, "step": 550900 }, { "loss": 3.105, "learning_rate": 5.611811608015541e-06, "epoch": 2.2166704885123365, "total_flos": 2115319512898007040, "step": 551000 }, { "loss": 3.07, "learning_rate": 5.6110004826196105e-06, "epoch": 2.217072788056531, "total_flos": 2115692415215677440, "step": 551100 }, { "loss": 3.13, "learning_rate": 5.6101893572236806e-06, "epoch": 2.2174750876007256, "total_flos": 2116072843563601920, "step": 551200 }, { "loss": 3.1375, "learning_rate": 5.60937823182775e-06, "epoch": 2.21787738714492, "total_flos": 2116447477346242560, "step": 551300 }, { "loss": 3.095, "learning_rate": 5.608567106431819e-06, "epoch": 2.218279686689115, "total_flos": 2116809985562849280, "step": 551400 }, { "loss": 3.0375, "learning_rate": 5.607755981035888e-06, "epoch": 2.2186819862333094, "total_flos": 2117194147714068480, "step": 551500 }, { "loss": 3.1275, "learning_rate": 5.606944855639958e-06, "epoch": 2.2190842857775044, "total_flos": 2117576296904478720, "step": 551600 }, { "loss": 3.0825, "learning_rate": 5.606133730244027e-06, "epoch": 2.219486585321699, "total_flos": 2117969296962785280, "step": 551700 }, { "loss": 3.0875, "learning_rate": 5.605322604848097e-06, "epoch": 2.2198888848658935, "total_flos": 2118354277045309440, "step": 551800 }, { "loss": 3.0875, "learning_rate": 5.604511479452166e-06, "epoch": 2.220291184410088, "total_flos": 2118724390960803840, "step": 551900 }, { "loss": 3.12, "learning_rate": 5.603700354056236e-06, "epoch": 2.2206934839542827, "total_flos": 2119099906409656320, "step": 552000 }, { "loss": 3.1175, "learning_rate": 5.602889228660305e-06, "epoch": 2.2210957834984772, "total_flos": 2119486878208020480, "step": 552100 }, { "loss": 3.1075, "learning_rate": 5.602078103264374e-06, "epoch": 2.221498083042672, "total_flos": 2119874243038310400, "step": 552200 }, { "loss": 3.16, "learning_rate": 5.601266977868443e-06, "epoch": 2.2219003825868664, "total_flos": 2120270180213575680, "step": 552300 }, { "loss": 3.1175, "learning_rate": 5.600455852472514e-06, "epoch": 2.222302682131061, "total_flos": 2120656052584796160, "step": 552400 }, { "loss": 3.06, "learning_rate": 5.599644727076583e-06, "epoch": 2.222704981675256, "total_flos": 2121039508340797440, "step": 552500 }, { "loss": 3.0975, "learning_rate": 5.598833601680652e-06, "epoch": 2.2231072812194506, "total_flos": 2121421976205742080, "step": 552600 }, { "loss": 3.08, "learning_rate": 5.598022476284721e-06, "epoch": 2.223509580763645, "total_flos": 2121799802044968960, "step": 552700 }, { "loss": 3.1075, "learning_rate": 5.597211350888792e-06, "epoch": 2.2239118803078397, "total_flos": 2122186773843333120, "step": 552800 }, { "loss": 3.125, "learning_rate": 5.59640022549286e-06, "epoch": 2.2243141798520343, "total_flos": 2122553541676216320, "step": 552900 }, { "loss": 3.1075, "learning_rate": 5.5955891000969295e-06, "epoch": 2.224716479396229, "total_flos": 2122934995093893120, "step": 553000 }, { "loss": 3.11, "learning_rate": 5.594777974700999e-06, "epoch": 2.2251187789404234, "total_flos": 2123307281307463680, "step": 553100 }, { "loss": 3.09, "learning_rate": 5.5939668493050696e-06, "epoch": 2.225521078484618, "total_flos": 2123682249698365440, "step": 553200 }, { "loss": 3.1075, "learning_rate": 5.593155723909138e-06, "epoch": 2.2259233780288126, "total_flos": 2124069603906170880, "step": 553300 }, { "loss": 3.0575, "learning_rate": 5.592344598513207e-06, "epoch": 2.226325677573007, "total_flos": 2124448784112168960, "step": 553400 }, { "loss": 3.13, "learning_rate": 5.591533473117276e-06, "epoch": 2.2267279771172017, "total_flos": 2124826280654376960, "step": 553500 }, { "loss": 3.09, "learning_rate": 5.590722347721347e-06, "epoch": 2.2271302766613967, "total_flos": 2125212705394790400, "step": 553600 }, { "loss": 3.0875, "learning_rate": 5.5899112223254155e-06, "epoch": 2.2275325762055913, "total_flos": 2125582251007365120, "step": 553700 }, { "loss": 3.12, "learning_rate": 5.589100096929485e-06, "epoch": 2.227934875749786, "total_flos": 2125966216642621440, "step": 553800 }, { "loss": 3.055, "learning_rate": 5.588288971533554e-06, "epoch": 2.2283371752939805, "total_flos": 2126364326115962880, "step": 553900 }, { "loss": 3.0875, "learning_rate": 5.587477846137625e-06, "epoch": 2.228739474838175, "total_flos": 2126748137725194240, "step": 554000 }, { "loss": 3.04, "learning_rate": 5.586666720741693e-06, "epoch": 2.2291417743823696, "total_flos": 2127128343000944640, "step": 554100 }, { "loss": 3.065, "learning_rate": 5.585855595345762e-06, "epoch": 2.229544073926564, "total_flos": 2127495031165194240, "step": 554200 }, { "loss": 3.1025, "learning_rate": 5.5850444699498316e-06, "epoch": 2.2299463734707587, "total_flos": 2127877732724797440, "step": 554300 }, { "loss": 3.12, "learning_rate": 5.5842333445539025e-06, "epoch": 2.2303486730149533, "total_flos": 2128275316385157120, "step": 554400 }, { "loss": 3.1075, "learning_rate": 5.583422219157971e-06, "epoch": 2.230750972559148, "total_flos": 2128655712865628160, "step": 554500 }, { "loss": 3.08, "learning_rate": 5.58261109376204e-06, "epoch": 2.231153272103343, "total_flos": 2129049111267102720, "step": 554600 }, { "loss": 3.15, "learning_rate": 5.581799968366109e-06, "epoch": 2.2315555716475375, "total_flos": 2129431701290618880, "step": 554700 }, { "loss": 3.1175, "learning_rate": 5.58098884297018e-06, "epoch": 2.231957871191732, "total_flos": 2129816155560161280, "step": 554800 }, { "loss": 3.0775, "learning_rate": 5.580177717574249e-06, "epoch": 2.2323601707359266, "total_flos": 2130175312382914560, "step": 554900 }, { "loss": 3.1225, "learning_rate": 5.579366592178318e-06, "epoch": 2.232762470280121, "total_flos": 2130548751136051200, "step": 555000 }, { "loss": 3.0825, "learning_rate": 5.578555466782387e-06, "epoch": 2.2331647698243158, "total_flos": 2130930693188014080, "step": 555100 }, { "loss": 3.08, "learning_rate": 5.577744341386458e-06, "epoch": 2.2335670693685103, "total_flos": 2131311498634137600, "step": 555200 }, { "loss": 3.1575, "learning_rate": 5.576933215990527e-06, "epoch": 2.233969368912705, "total_flos": 2131708110337167360, "step": 555300 }, { "loss": 3.0675, "learning_rate": 5.576122090594595e-06, "epoch": 2.2343716684568995, "total_flos": 2132083662964715520, "step": 555400 }, { "loss": 3.1175, "learning_rate": 5.575310965198666e-06, "epoch": 2.234773968001094, "total_flos": 2132468797073264640, "step": 555500 }, { "loss": 3.0825, "learning_rate": 5.574499839802735e-06, "epoch": 2.235176267545289, "total_flos": 2132869222248222720, "step": 555600 }, { "loss": 3.08, "learning_rate": 5.5736887144068045e-06, "epoch": 2.2355785670894837, "total_flos": 2133266025155973120, "step": 555700 }, { "loss": 3.135, "learning_rate": 5.572877589010873e-06, "epoch": 2.2359808666336782, "total_flos": 2133650405068124160, "step": 555800 }, { "loss": 3.1175, "learning_rate": 5.572066463614944e-06, "epoch": 2.236383166177873, "total_flos": 2134017443774361600, "step": 555900 }, { "loss": 3.0675, "learning_rate": 5.571255338219013e-06, "epoch": 2.2367854657220674, "total_flos": 2134393495658680320, "step": 556000 }, { "loss": 3.1025, "learning_rate": 5.570444212823082e-06, "epoch": 2.237187765266262, "total_flos": 2134782294524375040, "step": 556100 }, { "loss": 3.0975, "learning_rate": 5.5696330874271505e-06, "epoch": 2.2375900648104565, "total_flos": 2135162945944473600, "step": 556200 }, { "loss": 3.0725, "learning_rate": 5.568821962031221e-06, "epoch": 2.237992364354651, "total_flos": 2135555505169674240, "step": 556300 }, { "loss": 3.125, "learning_rate": 5.568010836635291e-06, "epoch": 2.2383946638988457, "total_flos": 2135932003198341120, "step": 556400 }, { "loss": 3.045, "learning_rate": 5.56719971123936e-06, "epoch": 2.2387969634430402, "total_flos": 2136303710486507520, "step": 556500 }, { "loss": 3.0975, "learning_rate": 5.566388585843428e-06, "epoch": 2.239199262987235, "total_flos": 2136675444330885120, "step": 556600 }, { "loss": 3.1025, "learning_rate": 5.565577460447499e-06, "epoch": 2.23960156253143, "total_flos": 2137051421857812480, "step": 556700 }, { "loss": 3.075, "learning_rate": 5.564766335051568e-06, "epoch": 2.2400038620756244, "total_flos": 2137443726143385600, "step": 556800 }, { "loss": 3.1425, "learning_rate": 5.5639552096556374e-06, "epoch": 2.240406161619819, "total_flos": 2137829332952494080, "step": 556900 }, { "loss": 3.075, "learning_rate": 5.563144084259706e-06, "epoch": 2.2408084611640136, "total_flos": 2138206123099484160, "step": 557000 }, { "loss": 3.1225, "learning_rate": 5.562332958863777e-06, "epoch": 2.241210760708208, "total_flos": 2138583879892561920, "step": 557100 }, { "loss": 3.1025, "learning_rate": 5.561521833467846e-06, "epoch": 2.2416130602524027, "total_flos": 2138972089210368000, "step": 557200 }, { "loss": 3.0725, "learning_rate": 5.560710708071915e-06, "epoch": 2.2420153597965973, "total_flos": 2139363352492462080, "step": 557300 }, { "loss": 3.14, "learning_rate": 5.559899582675984e-06, "epoch": 2.242417659340792, "total_flos": 2139750026861260800, "step": 557400 }, { "loss": 3.085, "learning_rate": 5.559088457280054e-06, "epoch": 2.2428199588849864, "total_flos": 2140129446073159680, "step": 557500 }, { "loss": 3.1125, "learning_rate": 5.5582773318841235e-06, "epoch": 2.243222258429181, "total_flos": 2140506183107727360, "step": 557600 }, { "loss": 3.0425, "learning_rate": 5.557466206488193e-06, "epoch": 2.243624557973376, "total_flos": 2140881464861921280, "step": 557700 }, { "loss": 3.115, "learning_rate": 5.556655081092262e-06, "epoch": 2.2440268575175706, "total_flos": 2141266726440284160, "step": 557800 }, { "loss": 3.115, "learning_rate": 5.555843955696332e-06, "epoch": 2.244429157061765, "total_flos": 2141649534224732160, "step": 557900 }, { "loss": 3.135, "learning_rate": 5.555032830300401e-06, "epoch": 2.2448314566059597, "total_flos": 2142046512403476480, "step": 558000 }, { "loss": 3.04, "learning_rate": 5.55422170490447e-06, "epoch": 2.2452337561501543, "total_flos": 2142411495658967040, "step": 558100 }, { "loss": 3.04, "learning_rate": 5.5534105795085395e-06, "epoch": 2.245636055694349, "total_flos": 2142796592588820480, "step": 558200 }, { "loss": 3.1, "learning_rate": 5.5525994541126096e-06, "epoch": 2.2460383552385434, "total_flos": 2143200130151731200, "step": 558300 }, { "loss": 3.15, "learning_rate": 5.551788328716679e-06, "epoch": 2.246440654782738, "total_flos": 2143594468643082240, "step": 558400 }, { "loss": 3.0875, "learning_rate": 5.550977203320748e-06, "epoch": 2.2468429543269326, "total_flos": 2143966946061373440, "step": 558500 }, { "loss": 3.06, "learning_rate": 5.550166077924817e-06, "epoch": 2.247245253871127, "total_flos": 2144350513353461760, "step": 558600 }, { "loss": 3.0575, "learning_rate": 5.549354952528887e-06, "epoch": 2.247647553415322, "total_flos": 2144722215330385920, "step": 558700 }, { "loss": 3.0425, "learning_rate": 5.548543827132956e-06, "epoch": 2.2480498529595168, "total_flos": 2145108576335892480, "step": 558800 }, { "loss": 3.115, "learning_rate": 5.547732701737026e-06, "epoch": 2.2484521525037113, "total_flos": 2145488340778536960, "step": 558900 }, { "loss": 3.0625, "learning_rate": 5.546921576341095e-06, "epoch": 2.248854452047906, "total_flos": 2145862655886643200, "step": 559000 }, { "loss": 3.0175, "learning_rate": 5.546110450945165e-06, "epoch": 2.2492567515921005, "total_flos": 2146227777234432000, "step": 559100 }, { "loss": 3.0925, "learning_rate": 5.545299325549234e-06, "epoch": 2.249659051136295, "total_flos": 2146619704421806080, "step": 559200 }, { "loss": 3.08, "learning_rate": 5.544488200153303e-06, "epoch": 2.2500613506804896, "total_flos": 2146990402573946880, "step": 559300 }, { "loss": 3.045, "learning_rate": 5.543677074757372e-06, "epoch": 2.250463650224684, "total_flos": 2147369046344478720, "step": 559400 }, { "loss": 3.05, "learning_rate": 5.5428659493614425e-06, "epoch": 2.2508659497688788, "total_flos": 2147755667600855040, "step": 559500 }, { "loss": 3.07, "learning_rate": 5.542054823965512e-06, "epoch": 2.2512682493130733, "total_flos": 2148139670414807040, "step": 559600 }, { "loss": 3.0675, "learning_rate": 5.541243698569581e-06, "epoch": 2.251670548857268, "total_flos": 2148513645603409920, "step": 559700 }, { "loss": 3.0825, "learning_rate": 5.54043257317365e-06, "epoch": 2.252072848401463, "total_flos": 2148882097100083200, "step": 559800 }, { "loss": 3.09, "learning_rate": 5.53962144777772e-06, "epoch": 2.2524751479456575, "total_flos": 2149251281548185600, "step": 559900 }, { "loss": 3.0925, "learning_rate": 5.538810322381789e-06, "epoch": 2.252877447489852, "total_flos": 2149630950388469760, "step": 560000 }, { "loss": 3.07, "learning_rate": 5.5379991969858585e-06, "epoch": 2.2532797470340467, "total_flos": 2150012664057016320, "step": 560100 }, { "loss": 3.07, "learning_rate": 5.5371880715899285e-06, "epoch": 2.2536820465782412, "total_flos": 2150395795827240960, "step": 560200 }, { "loss": 3.0725, "learning_rate": 5.536376946193998e-06, "epoch": 2.254084346122436, "total_flos": 2150797894043504640, "step": 560300 }, { "loss": 3.0825, "learning_rate": 5.535565820798067e-06, "epoch": 2.2544866456666304, "total_flos": 2151204729887846400, "step": 560400 }, { "loss": 3.07, "learning_rate": 5.534754695402136e-06, "epoch": 2.254888945210825, "total_flos": 2151586576337448960, "step": 560500 }, { "loss": 3.1175, "learning_rate": 5.533943570006206e-06, "epoch": 2.2552912447550195, "total_flos": 2151956530915676160, "step": 560600 }, { "loss": 3.07, "learning_rate": 5.533132444610275e-06, "epoch": 2.2556935442992145, "total_flos": 2152337299183104000, "step": 560700 }, { "loss": 3.09, "learning_rate": 5.5323213192143445e-06, "epoch": 2.256095843843409, "total_flos": 2152712490646179840, "step": 560800 }, { "loss": 3.08, "learning_rate": 5.531510193818414e-06, "epoch": 2.2564981433876037, "total_flos": 2153113877155983360, "step": 560900 }, { "loss": 3.1025, "learning_rate": 5.530699068422484e-06, "epoch": 2.2569004429317983, "total_flos": 2153497651586519040, "step": 561000 }, { "loss": 3.115, "learning_rate": 5.529887943026553e-06, "epoch": 2.257302742475993, "total_flos": 2153871031915991040, "step": 561100 }, { "loss": 3.1125, "learning_rate": 5.529076817630622e-06, "epoch": 2.2577050420201874, "total_flos": 2154248825887764480, "step": 561200 }, { "loss": 3.0425, "learning_rate": 5.528265692234691e-06, "epoch": 2.258107341564382, "total_flos": 2154625307982704640, "step": 561300 }, { "loss": 3.075, "learning_rate": 5.527454566838761e-06, "epoch": 2.2585096411085765, "total_flos": 2155007972363612160, "step": 561400 }, { "loss": 3.0575, "learning_rate": 5.526643441442831e-06, "epoch": 2.258911940652771, "total_flos": 2155397870656450560, "step": 561500 }, { "loss": 3.0475, "learning_rate": 5.5258323160469e-06, "epoch": 2.2593142401969657, "total_flos": 2155764765959147520, "step": 561600 }, { "loss": 3.0775, "learning_rate": 5.525021190650969e-06, "epoch": 2.2597165397411603, "total_flos": 2156146049417072640, "step": 561700 }, { "loss": 3.13, "learning_rate": 5.524210065255039e-06, "epoch": 2.2601188392853553, "total_flos": 2156526228136611840, "step": 561800 }, { "loss": 3.1025, "learning_rate": 5.523398939859108e-06, "epoch": 2.26052113882955, "total_flos": 2156909885719818240, "step": 561900 }, { "loss": 3.095, "learning_rate": 5.5225878144631774e-06, "epoch": 2.2609234383737444, "total_flos": 2157304303879802880, "step": 562000 }, { "loss": 3.06, "learning_rate": 5.521776689067247e-06, "epoch": 2.261325737917939, "total_flos": 2157693979100467200, "step": 562100 }, { "loss": 3.1075, "learning_rate": 5.520965563671317e-06, "epoch": 2.2617280374621336, "total_flos": 2158071709337333760, "step": 562200 }, { "loss": 3.105, "learning_rate": 5.520154438275386e-06, "epoch": 2.262130337006328, "total_flos": 2158463918020546560, "step": 562300 }, { "loss": 3.065, "learning_rate": 5.519343312879455e-06, "epoch": 2.2625326365505227, "total_flos": 2158845158988533760, "step": 562400 }, { "loss": 3.0375, "learning_rate": 5.518532187483524e-06, "epoch": 2.2629349360947173, "total_flos": 2159217397400924160, "step": 562500 }, { "loss": 3.085, "learning_rate": 5.517721062087594e-06, "epoch": 2.263337235638912, "total_flos": 2159603397241958400, "step": 562600 }, { "loss": 3.115, "learning_rate": 5.5169099366916635e-06, "epoch": 2.2637395351831064, "total_flos": 2159972980033228800, "step": 562700 }, { "loss": 3.1225, "learning_rate": 5.516098811295733e-06, "epoch": 2.264141834727301, "total_flos": 2160357290899230720, "step": 562800 }, { "loss": 3.095, "learning_rate": 5.515287685899802e-06, "epoch": 2.264544134271496, "total_flos": 2160734914911252480, "step": 562900 }, { "loss": 3.11, "learning_rate": 5.514476560503872e-06, "epoch": 2.2649464338156906, "total_flos": 2161121127201976320, "step": 563000 }, { "loss": 3.11, "learning_rate": 5.513665435107941e-06, "epoch": 2.265348733359885, "total_flos": 2161513596136058880, "step": 563100 }, { "loss": 3.05, "learning_rate": 5.51285430971201e-06, "epoch": 2.2657510329040798, "total_flos": 2161897869823365120, "step": 563200 }, { "loss": 3.125, "learning_rate": 5.5120431843160795e-06, "epoch": 2.2661533324482743, "total_flos": 2162289600494776320, "step": 563300 }, { "loss": 3.09, "learning_rate": 5.5112320589201496e-06, "epoch": 2.266555631992469, "total_flos": 2162666709316300800, "step": 563400 }, { "loss": 3.0425, "learning_rate": 5.510420933524219e-06, "epoch": 2.2669579315366635, "total_flos": 2163068887201198080, "step": 563500 }, { "loss": 3.085, "learning_rate": 5.509609808128288e-06, "epoch": 2.267360231080858, "total_flos": 2163447674375270400, "step": 563600 }, { "loss": 3.1225, "learning_rate": 5.508798682732357e-06, "epoch": 2.2677625306250526, "total_flos": 2163834539948789760, "step": 563700 }, { "loss": 3.065, "learning_rate": 5.507987557336427e-06, "epoch": 2.2681648301692476, "total_flos": 2164227805569208320, "step": 563800 }, { "loss": 3.135, "learning_rate": 5.507176431940496e-06, "epoch": 2.268567129713442, "total_flos": 2164623461248634880, "step": 563900 }, { "loss": 3.0725, "learning_rate": 5.506365306544566e-06, "epoch": 2.268969429257637, "total_flos": 2165003400962273280, "step": 564000 }, { "loss": 3.045, "learning_rate": 5.505554181148635e-06, "epoch": 2.2693717288018314, "total_flos": 2165386654891069440, "step": 564100 }, { "loss": 3.0925, "learning_rate": 5.504743055752705e-06, "epoch": 2.269774028346026, "total_flos": 2165759169488056320, "step": 564200 }, { "loss": 3.0675, "learning_rate": 5.503931930356774e-06, "epoch": 2.2701763278902205, "total_flos": 2166143597201387520, "step": 564300 }, { "loss": 3.09, "learning_rate": 5.503120804960843e-06, "epoch": 2.270578627434415, "total_flos": 2166520121786265600, "step": 564400 }, { "loss": 3.1225, "learning_rate": 5.502309679564912e-06, "epoch": 2.2709809269786096, "total_flos": 2166902425002700800, "step": 564500 }, { "loss": 3.115, "learning_rate": 5.5014985541689825e-06, "epoch": 2.271383226522804, "total_flos": 2167284308630999040, "step": 564600 }, { "loss": 3.0575, "learning_rate": 5.500687428773052e-06, "epoch": 2.271785526066999, "total_flos": 2167676464201789440, "step": 564700 }, { "loss": 3.1, "learning_rate": 5.499876303377121e-06, "epoch": 2.2721878256111934, "total_flos": 2168051050183249920, "step": 564800 }, { "loss": 3.1075, "learning_rate": 5.499065177981191e-06, "epoch": 2.2725901251553884, "total_flos": 2168443104840437760, "step": 564900 }, { "loss": 3.1375, "learning_rate": 5.49825405258526e-06, "epoch": 2.272992424699583, "total_flos": 2168823517254635520, "step": 565000 }, { "loss": 3.055, "learning_rate": 5.497442927189329e-06, "epoch": 2.2733947242437775, "total_flos": 2169209782657781760, "step": 565100 }, { "loss": 3.095, "learning_rate": 5.4966318017933985e-06, "epoch": 2.273797023787972, "total_flos": 2169582313188495360, "step": 565200 }, { "loss": 3.105, "learning_rate": 5.4958206763974685e-06, "epoch": 2.2741993233321667, "total_flos": 2169960038114119680, "step": 565300 }, { "loss": 3.075, "learning_rate": 5.495009551001538e-06, "epoch": 2.2746016228763613, "total_flos": 2170340036251422720, "step": 565400 }, { "loss": 3.105, "learning_rate": 5.494198425605607e-06, "epoch": 2.275003922420556, "total_flos": 2170724474587238400, "step": 565500 }, { "loss": 3.0425, "learning_rate": 5.493387300209676e-06, "epoch": 2.2754062219647504, "total_flos": 2171110134508769280, "step": 565600 }, { "loss": 3.1175, "learning_rate": 5.492576174813746e-06, "epoch": 2.275808521508945, "total_flos": 2171478118616125440, "step": 565700 }, { "loss": 3.0525, "learning_rate": 5.491765049417815e-06, "epoch": 2.27621082105314, "total_flos": 2171871867559587840, "step": 565800 }, { "loss": 3.065, "learning_rate": 5.4909539240218845e-06, "epoch": 2.276613120597334, "total_flos": 2172267980005847040, "step": 565900 }, { "loss": 3.0975, "learning_rate": 5.490142798625954e-06, "epoch": 2.277015420141529, "total_flos": 2172636920136806400, "step": 566000 }, { "loss": 3.04, "learning_rate": 5.489331673230024e-06, "epoch": 2.2774177196857237, "total_flos": 2173014198918082560, "step": 566100 }, { "loss": 3.03, "learning_rate": 5.488520547834093e-06, "epoch": 2.2778200192299183, "total_flos": 2173405217883033600, "step": 566200 }, { "loss": 3.0975, "learning_rate": 5.487709422438162e-06, "epoch": 2.278222318774113, "total_flos": 2173791286770216960, "step": 566300 }, { "loss": 3.1125, "learning_rate": 5.486898297042231e-06, "epoch": 2.2786246183183074, "total_flos": 2174175629503672320, "step": 566400 }, { "loss": 3.055, "learning_rate": 5.486087171646301e-06, "epoch": 2.279026917862502, "total_flos": 2174553928043458560, "step": 566500 }, { "loss": 3.0825, "learning_rate": 5.485276046250371e-06, "epoch": 2.2794292174066966, "total_flos": 2174949238492139520, "step": 566600 }, { "loss": 3.08, "learning_rate": 5.48446492085444e-06, "epoch": 2.279831516950891, "total_flos": 2175347682573742080, "step": 566700 }, { "loss": 3.0875, "learning_rate": 5.483653795458509e-06, "epoch": 2.2802338164950857, "total_flos": 2175723633544458240, "step": 566800 }, { "loss": 3.0775, "learning_rate": 5.482842670062579e-06, "epoch": 2.2806361160392807, "total_flos": 2176112129669345280, "step": 566900 }, { "loss": 3.0775, "learning_rate": 5.482031544666648e-06, "epoch": 2.2810384155834753, "total_flos": 2176511497907097600, "step": 567000 }, { "loss": 3.0325, "learning_rate": 5.4812204192707174e-06, "epoch": 2.28144071512767, "total_flos": 2176894836815769600, "step": 567100 }, { "loss": 3.09, "learning_rate": 5.480409293874787e-06, "epoch": 2.2818430146718645, "total_flos": 2177292659482030080, "step": 567200 }, { "loss": 3.025, "learning_rate": 5.479598168478857e-06, "epoch": 2.282245314216059, "total_flos": 2177667590694236160, "step": 567300 }, { "loss": 3.1125, "learning_rate": 5.478787043082926e-06, "epoch": 2.2826476137602536, "total_flos": 2178062667448258560, "step": 567400 }, { "loss": 3.1025, "learning_rate": 5.477975917686995e-06, "epoch": 2.283049913304448, "total_flos": 2178443510073077760, "step": 567500 }, { "loss": 3.04, "learning_rate": 5.477164792291064e-06, "epoch": 2.2834522128486427, "total_flos": 2178814006398013440, "step": 567600 }, { "loss": 3.0725, "learning_rate": 5.476353666895134e-06, "epoch": 2.2838545123928373, "total_flos": 2179190440691773440, "step": 567700 }, { "loss": 3.115, "learning_rate": 5.4755425414992035e-06, "epoch": 2.284256811937032, "total_flos": 2179570895595909120, "step": 567800 }, { "loss": 3.0725, "learning_rate": 5.474731416103273e-06, "epoch": 2.2846591114812265, "total_flos": 2179944971698114560, "step": 567900 }, { "loss": 3.0175, "learning_rate": 5.473920290707342e-06, "epoch": 2.2850614110254215, "total_flos": 2180331396438528000, "step": 568000 }, { "loss": 3.02, "learning_rate": 5.473109165311412e-06, "epoch": 2.285463710569616, "total_flos": 2180725772108574720, "step": 568100 }, { "loss": 3.0675, "learning_rate": 5.472298039915481e-06, "epoch": 2.2858660101138106, "total_flos": 2181107549512028160, "step": 568200 }, { "loss": 3.05, "learning_rate": 5.47148691451955e-06, "epoch": 2.286268309658005, "total_flos": 2181496672363499520, "step": 568300 }, { "loss": 3.06, "learning_rate": 5.4706757891236195e-06, "epoch": 2.2866706092021998, "total_flos": 2181859865730355200, "step": 568400 }, { "loss": 3.0425, "learning_rate": 5.4698646637276896e-06, "epoch": 2.2870729087463944, "total_flos": 2182238886599086080, "step": 568500 }, { "loss": 3.125, "learning_rate": 5.469053538331759e-06, "epoch": 2.287475208290589, "total_flos": 2182624955486269440, "step": 568600 }, { "loss": 3.075, "learning_rate": 5.468242412935828e-06, "epoch": 2.2878775078347835, "total_flos": 2183004279095808000, "step": 568700 }, { "loss": 3.0825, "learning_rate": 5.467431287539897e-06, "epoch": 2.288279807378978, "total_flos": 2183383034402426880, "step": 568800 }, { "loss": 3.1025, "learning_rate": 5.466620162143967e-06, "epoch": 2.288682106923173, "total_flos": 2183779194649866240, "step": 568900 }, { "loss": 3.1325, "learning_rate": 5.465809036748036e-06, "epoch": 2.2890844064673677, "total_flos": 2184151762359275520, "step": 569000 }, { "loss": 3.0675, "learning_rate": 5.464997911352106e-06, "epoch": 2.2894867060115622, "total_flos": 2184534501097574400, "step": 569100 }, { "loss": 3.105, "learning_rate": 5.464186785956175e-06, "epoch": 2.289889005555757, "total_flos": 2184905852532510720, "step": 569200 }, { "loss": 3.0925, "learning_rate": 5.463375660560245e-06, "epoch": 2.2902913050999514, "total_flos": 2185288288530001920, "step": 569300 }, { "loss": 3.05, "learning_rate": 5.462564535164314e-06, "epoch": 2.290693604644146, "total_flos": 2185667739609354240, "step": 569400 }, { "loss": 2.9875, "learning_rate": 5.461753409768383e-06, "epoch": 2.2910959041883405, "total_flos": 2186053627914301440, "step": 569500 }, { "loss": 3.045, "learning_rate": 5.460942284372453e-06, "epoch": 2.291498203732535, "total_flos": 2186431018231664640, "step": 569600 }, { "loss": 3.11, "learning_rate": 5.4601311589765225e-06, "epoch": 2.2919005032767297, "total_flos": 2186814633324933120, "step": 569700 }, { "loss": 3.08, "learning_rate": 5.459320033580592e-06, "epoch": 2.2923028028209242, "total_flos": 2187207649316966400, "step": 569800 }, { "loss": 3.075, "learning_rate": 5.458508908184661e-06, "epoch": 2.292705102365119, "total_flos": 2187598843552911360, "step": 569900 }, { "loss": 3.05, "learning_rate": 5.457697782788731e-06, "epoch": 2.293107401909314, "total_flos": 2188000931146690560, "step": 570000 }, { "loss": 3.0825, "learning_rate": 5.4568866573928e-06, "epoch": 2.2935097014535084, "total_flos": 2188375219698585600, "step": 570100 }, { "loss": 3.0075, "learning_rate": 5.456075531996869e-06, "epoch": 2.293912000997703, "total_flos": 2188759381849804800, "step": 570200 }, { "loss": 3.0225, "learning_rate": 5.4552644066009385e-06, "epoch": 2.2943143005418976, "total_flos": 2189157799375196160, "step": 570300 }, { "loss": 3.0625, "learning_rate": 5.4544532812050085e-06, "epoch": 2.294716600086092, "total_flos": 2189534929441689600, "step": 570400 }, { "loss": 3.045, "learning_rate": 5.453642155809078e-06, "epoch": 2.2951188996302867, "total_flos": 2189935678602424320, "step": 570500 }, { "loss": 3.055, "learning_rate": 5.452831030413147e-06, "epoch": 2.2955211991744813, "total_flos": 2190315060635627520, "step": 570600 }, { "loss": 3.07, "learning_rate": 5.452019905017216e-06, "epoch": 2.295923498718676, "total_flos": 2190704029461073920, "step": 570700 }, { "loss": 3.125, "learning_rate": 5.451208779621286e-06, "epoch": 2.2963257982628704, "total_flos": 2191087570196951040, "step": 570800 }, { "loss": 3.12, "learning_rate": 5.450397654225355e-06, "epoch": 2.296728097807065, "total_flos": 2191475779514757120, "step": 570900 }, { "loss": 3.055, "learning_rate": 5.4495865288294245e-06, "epoch": 2.2971303973512596, "total_flos": 2191859182158336000, "step": 571000 }, { "loss": 3.07, "learning_rate": 5.448775403433494e-06, "epoch": 2.2975326968954546, "total_flos": 2192231378080788480, "step": 571100 }, { "loss": 3.085, "learning_rate": 5.447964278037564e-06, "epoch": 2.297934996439649, "total_flos": 2192613150172999680, "step": 571200 }, { "loss": 3.12, "learning_rate": 5.447153152641633e-06, "epoch": 2.2983372959838437, "total_flos": 2192981113035386880, "step": 571300 }, { "loss": 3.12, "learning_rate": 5.446342027245702e-06, "epoch": 2.2987395955280383, "total_flos": 2193340099898388480, "step": 571400 }, { "loss": 3.0275, "learning_rate": 5.445530901849771e-06, "epoch": 2.299141895072233, "total_flos": 2193696856039649280, "step": 571500 }, { "loss": 3.105, "learning_rate": 5.444719776453841e-06, "epoch": 2.2995441946164275, "total_flos": 2194084093400125440, "step": 571600 }, { "loss": 3.0575, "learning_rate": 5.443908651057911e-06, "epoch": 2.299946494160622, "total_flos": 2194468027167928320, "step": 571700 }, { "loss": 3.0925, "learning_rate": 5.44309752566198e-06, "epoch": 2.3003487937048166, "total_flos": 2194856746364989440, "step": 571800 }, { "loss": 3.0875, "learning_rate": 5.442286400266049e-06, "epoch": 2.300751093249011, "total_flos": 2195237620857262080, "step": 571900 }, { "loss": 3.0625, "learning_rate": 5.441475274870119e-06, "epoch": 2.301153392793206, "total_flos": 2195644881600983040, "step": 572000 }, { "loss": 3.075, "learning_rate": 5.440664149474188e-06, "epoch": 2.3015556923374008, "total_flos": 2196022356898222080, "step": 572100 }, { "loss": 3.12, "learning_rate": 5.4398530240782574e-06, "epoch": 2.3019579918815953, "total_flos": 2196395068011171840, "step": 572200 }, { "loss": 3.08, "learning_rate": 5.439041898682327e-06, "epoch": 2.30236029142579, "total_flos": 2196787850308546560, "step": 572300 }, { "loss": 3.0275, "learning_rate": 5.438230773286397e-06, "epoch": 2.3027625909699845, "total_flos": 2197159212365967360, "step": 572400 }, { "loss": 3.0375, "learning_rate": 5.437419647890466e-06, "epoch": 2.303164890514179, "total_flos": 2197539284860661760, "step": 572500 }, { "loss": 3.0725, "learning_rate": 5.436608522494535e-06, "epoch": 2.3035671900583736, "total_flos": 2197919962836971520, "step": 572600 }, { "loss": 3.0425, "learning_rate": 5.435797397098604e-06, "epoch": 2.303969489602568, "total_flos": 2198314455354347520, "step": 572700 }, { "loss": 3.0675, "learning_rate": 5.434986271702674e-06, "epoch": 2.3043717891467628, "total_flos": 2198691787248046080, "step": 572800 }, { "loss": 3.06, "learning_rate": 5.4341751463067435e-06, "epoch": 2.3047740886909573, "total_flos": 2199069060718080000, "step": 572900 }, { "loss": 3.055, "learning_rate": 5.433364020910813e-06, "epoch": 2.305176388235152, "total_flos": 2199456611441848320, "step": 573000 }, { "loss": 3.055, "learning_rate": 5.432552895514882e-06, "epoch": 2.305578687779347, "total_flos": 2199840019396669440, "step": 573100 }, { "loss": 3.0825, "learning_rate": 5.431741770118952e-06, "epoch": 2.3059809873235415, "total_flos": 2200230077026775040, "step": 573200 }, { "loss": 3.06, "learning_rate": 5.430930644723021e-06, "epoch": 2.306383286867736, "total_flos": 2200611312683520000, "step": 573300 }, { "loss": 3.0775, "learning_rate": 5.43011951932709e-06, "epoch": 2.3067855864119307, "total_flos": 2201002358204682240, "step": 573400 }, { "loss": 3.055, "learning_rate": 5.4293083939311595e-06, "epoch": 2.3071878859561252, "total_flos": 2201379313000181760, "step": 573500 }, { "loss": 3.0725, "learning_rate": 5.4284972685352296e-06, "epoch": 2.30759018550032, "total_flos": 2201765652760719360, "step": 573600 }, { "loss": 3.05, "learning_rate": 5.427686143139299e-06, "epoch": 2.3079924850445144, "total_flos": 2202149363456348160, "step": 573700 }, { "loss": 3.1225, "learning_rate": 5.426875017743368e-06, "epoch": 2.308394784588709, "total_flos": 2202524592098119680, "step": 573800 }, { "loss": 3.085, "learning_rate": 5.426063892347437e-06, "epoch": 2.3087970841329035, "total_flos": 2202912243735490560, "step": 573900 }, { "loss": 3.0725, "learning_rate": 5.425252766951507e-06, "epoch": 2.3091993836770985, "total_flos": 2203293659974471680, "step": 574000 }, { "loss": 3.1175, "learning_rate": 5.424441641555576e-06, "epoch": 2.3096016832212927, "total_flos": 2203663232143257600, "step": 574100 }, { "loss": 3.03, "learning_rate": 5.423630516159646e-06, "epoch": 2.3100039827654877, "total_flos": 2204047612055408640, "step": 574200 }, { "loss": 3.0775, "learning_rate": 5.4228193907637165e-06, "epoch": 2.3104062823096823, "total_flos": 2204432958613647360, "step": 574300 }, { "loss": 3.0325, "learning_rate": 5.422008265367785e-06, "epoch": 2.310808581853877, "total_flos": 2204827174946426880, "step": 574400 }, { "loss": 3.06, "learning_rate": 5.421197139971854e-06, "epoch": 2.3112108813980714, "total_flos": 2205228248092938240, "step": 574500 }, { "loss": 3.0325, "learning_rate": 5.420386014575923e-06, "epoch": 2.311613180942266, "total_flos": 2205625359052738560, "step": 574600 }, { "loss": 3.0225, "learning_rate": 5.419574889179994e-06, "epoch": 2.3120154804864606, "total_flos": 2206012819485388800, "step": 574700 }, { "loss": 3.105, "learning_rate": 5.418763763784063e-06, "epoch": 2.312417780030655, "total_flos": 2206405893901086720, "step": 574800 }, { "loss": 3.015, "learning_rate": 5.417952638388132e-06, "epoch": 2.3128200795748497, "total_flos": 2206792977235537920, "step": 574900 }, { "loss": 3.0825, "learning_rate": 5.417141512992201e-06, "epoch": 2.3132223791190443, "total_flos": 2207166123870351360, "step": 575000 }, { "loss": 3.095, "learning_rate": 5.416330387596272e-06, "epoch": 2.3136246786632393, "total_flos": 2207553674594119680, "step": 575100 }, { "loss": 3.0975, "learning_rate": 5.415519262200341e-06, "epoch": 2.314026978207434, "total_flos": 2207931951888936960, "step": 575200 }, { "loss": 3.0625, "learning_rate": 5.414708136804409e-06, "epoch": 2.3144292777516284, "total_flos": 2208326895861903360, "step": 575300 }, { "loss": 3.12, "learning_rate": 5.4138970114084785e-06, "epoch": 2.314831577295823, "total_flos": 2208722955195740160, "step": 575400 }, { "loss": 3.095, "learning_rate": 5.413085886012549e-06, "epoch": 2.3152338768400176, "total_flos": 2209103654417018880, "step": 575500 }, { "loss": 3.0725, "learning_rate": 5.4122747606166186e-06, "epoch": 2.315636176384212, "total_flos": 2209489155001282560, "step": 575600 }, { "loss": 3.0825, "learning_rate": 5.411463635220687e-06, "epoch": 2.3160384759284067, "total_flos": 2209869493058088960, "step": 575700 }, { "loss": 3.1325, "learning_rate": 5.410652509824756e-06, "epoch": 2.3164407754726013, "total_flos": 2210238512857681920, "step": 575800 }, { "loss": 3.07, "learning_rate": 5.409841384428827e-06, "epoch": 2.316843075016796, "total_flos": 2210634136669655040, "step": 575900 }, { "loss": 3.035, "learning_rate": 5.409030259032896e-06, "epoch": 2.3172453745609904, "total_flos": 2211014363190374400, "step": 576000 }, { "loss": 3.065, "learning_rate": 5.4082191336369645e-06, "epoch": 2.317647674105185, "total_flos": 2211396337109790720, "step": 576100 }, { "loss": 3.0425, "learning_rate": 5.407408008241034e-06, "epoch": 2.31804997364938, "total_flos": 2211785444027535360, "step": 576200 }, { "loss": 3.0175, "learning_rate": 5.406596882845105e-06, "epoch": 2.3184522731935746, "total_flos": 2212183160468951040, "step": 576300 }, { "loss": 3.12, "learning_rate": 5.405785757449174e-06, "epoch": 2.318854572737769, "total_flos": 2212544739218165760, "step": 576400 }, { "loss": 3.055, "learning_rate": 5.404974632053242e-06, "epoch": 2.3192568722819638, "total_flos": 2212925454373171200, "step": 576500 }, { "loss": 3.0275, "learning_rate": 5.404163506657311e-06, "epoch": 2.3196591718261583, "total_flos": 2213319256429056000, "step": 576600 }, { "loss": 3.0875, "learning_rate": 5.403352381261382e-06, "epoch": 2.320061471370353, "total_flos": 2213703684142387200, "step": 576700 }, { "loss": 3.1375, "learning_rate": 5.4025412558654515e-06, "epoch": 2.3204637709145475, "total_flos": 2214092169644789760, "step": 576800 }, { "loss": 3.0875, "learning_rate": 5.40173013046952e-06, "epoch": 2.320866070458742, "total_flos": 2214470808104079360, "step": 576900 }, { "loss": 3.0625, "learning_rate": 5.400919005073589e-06, "epoch": 2.3212683700029366, "total_flos": 2214863553222758400, "step": 577000 }, { "loss": 3.0725, "learning_rate": 5.40010787967766e-06, "epoch": 2.3216706695471316, "total_flos": 2215243923147018240, "step": 577100 }, { "loss": 3.0625, "learning_rate": 5.399296754281729e-06, "epoch": 2.322072969091326, "total_flos": 2215628143721902080, "step": 577200 }, { "loss": 3.0775, "learning_rate": 5.3984856288857974e-06, "epoch": 2.322475268635521, "total_flos": 2216007127411937280, "step": 577300 }, { "loss": 3.0925, "learning_rate": 5.397674503489867e-06, "epoch": 2.3228775681797154, "total_flos": 2216390099844894720, "step": 577400 }, { "loss": 3.0525, "learning_rate": 5.3968633780939375e-06, "epoch": 2.32327986772391, "total_flos": 2216768292159836160, "step": 577500 }, { "loss": 3.0775, "learning_rate": 5.396052252698007e-06, "epoch": 2.3236821672681045, "total_flos": 2217136865815080960, "step": 577600 }, { "loss": 3.0825, "learning_rate": 5.395241127302076e-06, "epoch": 2.324084466812299, "total_flos": 2217507983555358720, "step": 577700 }, { "loss": 3.0725, "learning_rate": 5.394430001906144e-06, "epoch": 2.3244867663564936, "total_flos": 2217884752457379840, "step": 577800 }, { "loss": 3.0825, "learning_rate": 5.393618876510215e-06, "epoch": 2.3248890659006882, "total_flos": 2218275792667299840, "step": 577900 }, { "loss": 3.0875, "learning_rate": 5.392807751114284e-06, "epoch": 2.325291365444883, "total_flos": 2218643585569935360, "step": 578000 }, { "loss": 3.075, "learning_rate": 5.3919966257183535e-06, "epoch": 2.3256936649890774, "total_flos": 2219022765775933440, "step": 578100 }, { "loss": 3.075, "learning_rate": 5.391185500322422e-06, "epoch": 2.3260959645332724, "total_flos": 2219405573560381440, "step": 578200 }, { "loss": 3.0625, "learning_rate": 5.390374374926493e-06, "epoch": 2.326498264077467, "total_flos": 2219804846195773440, "step": 578300 }, { "loss": 2.9725, "learning_rate": 5.389563249530562e-06, "epoch": 2.3269005636216615, "total_flos": 2220194022159667200, "step": 578400 }, { "loss": 3.0775, "learning_rate": 5.388752124134631e-06, "epoch": 2.327302863165856, "total_flos": 2220559711810375680, "step": 578500 }, { "loss": 3.0825, "learning_rate": 5.3879409987386995e-06, "epoch": 2.3277051627100507, "total_flos": 2220927642805309440, "step": 578600 }, { "loss": 3.0675, "learning_rate": 5.38712987334277e-06, "epoch": 2.3281074622542453, "total_flos": 2221307226665717760, "step": 578700 }, { "loss": 3.0525, "learning_rate": 5.38631874794684e-06, "epoch": 2.32850976179844, "total_flos": 2221691654379048960, "step": 578800 }, { "loss": 3.06, "learning_rate": 5.385507622550909e-06, "epoch": 2.3289120613426344, "total_flos": 2222086130962698240, "step": 578900 }, { "loss": 3.095, "learning_rate": 5.384696497154979e-06, "epoch": 2.329314360886829, "total_flos": 2222468582893916160, "step": 579000 }, { "loss": 3.0425, "learning_rate": 5.383885371759048e-06, "epoch": 2.3297166604310235, "total_flos": 2222851040136376320, "step": 579100 }, { "loss": 3.1, "learning_rate": 5.383074246363117e-06, "epoch": 2.330118959975218, "total_flos": 2223242712384122880, "step": 579200 }, { "loss": 3.085, "learning_rate": 5.3822631209671864e-06, "epoch": 2.330521259519413, "total_flos": 2223620442620989440, "step": 579300 }, { "loss": 3.1, "learning_rate": 5.3814519955712565e-06, "epoch": 2.3309235590636077, "total_flos": 2224010617098424320, "step": 579400 }, { "loss": 3.0725, "learning_rate": 5.380640870175326e-06, "epoch": 2.3313258586078023, "total_flos": 2224382685551063040, "step": 579500 }, { "loss": 3.1125, "learning_rate": 5.379829744779395e-06, "epoch": 2.331728158151997, "total_flos": 2224773396463964160, "step": 579600 }, { "loss": 3.0425, "learning_rate": 5.379018619383464e-06, "epoch": 2.3321304576961914, "total_flos": 2225180673141411840, "step": 579700 }, { "loss": 3.09, "learning_rate": 5.378207493987534e-06, "epoch": 2.332532757240386, "total_flos": 2225548657248768000, "step": 579800 }, { "loss": 3.08, "learning_rate": 5.377396368591603e-06, "epoch": 2.3329350567845806, "total_flos": 2225920486695505920, "step": 579900 }, { "loss": 3.08, "learning_rate": 5.3765852431956725e-06, "epoch": 2.333337356328775, "total_flos": 2226303942451507200, "step": 580000 }, { "loss": 2.99, "learning_rate": 5.375774117799742e-06, "epoch": 2.3337396558729697, "total_flos": 2226705897264230400, "step": 580100 }, { "loss": 3.0575, "learning_rate": 5.374962992403812e-06, "epoch": 2.3341419554171647, "total_flos": 2227080605404262400, "step": 580200 }, { "loss": 3.0925, "learning_rate": 5.374151867007881e-06, "epoch": 2.3345442549613593, "total_flos": 2227462228781690880, "step": 580300 }, { "loss": 3.085, "learning_rate": 5.37334074161195e-06, "epoch": 2.334946554505554, "total_flos": 2227843767179243520, "step": 580400 }, { "loss": 3.0475, "learning_rate": 5.372529616216019e-06, "epoch": 2.3353488540497485, "total_flos": 2228226399692697600, "step": 580500 }, { "loss": 3.0375, "learning_rate": 5.371718490820089e-06, "epoch": 2.335751153593943, "total_flos": 2228612240196464640, "step": 580600 }, { "loss": 3.08, "learning_rate": 5.3709073654241586e-06, "epoch": 2.3361534531381376, "total_flos": 2228985599280967680, "step": 580700 }, { "loss": 3.0975, "learning_rate": 5.370096240028228e-06, "epoch": 2.336555752682332, "total_flos": 2229364285541437440, "step": 580800 }, { "loss": 3.0775, "learning_rate": 5.369285114632297e-06, "epoch": 2.3369580522265267, "total_flos": 2229740799503831040, "step": 580900 }, { "loss": 3.0525, "learning_rate": 5.368473989236367e-06, "epoch": 2.3373603517707213, "total_flos": 2230118970573803520, "step": 581000 }, { "loss": 3.04, "learning_rate": 5.367662863840436e-06, "epoch": 2.337762651314916, "total_flos": 2230492042851225600, "step": 581100 }, { "loss": 2.9725, "learning_rate": 5.366851738444505e-06, "epoch": 2.3381649508591105, "total_flos": 2230883359245742080, "step": 581200 }, { "loss": 3.0675, "learning_rate": 5.366040613048575e-06, "epoch": 2.3385672504033055, "total_flos": 2231276178721812480, "step": 581300 }, { "loss": 3.05, "learning_rate": 5.365229487652645e-06, "epoch": 2.3389695499475, "total_flos": 2231667181753036800, "step": 581400 }, { "loss": 3.045, "learning_rate": 5.364418362256714e-06, "epoch": 2.3393718494916946, "total_flos": 2232070182880481280, "step": 581500 }, { "loss": 3.065, "learning_rate": 5.363607236860783e-06, "epoch": 2.339774149035889, "total_flos": 2232456214588968960, "step": 581600 }, { "loss": 3.07, "learning_rate": 5.362796111464852e-06, "epoch": 2.340176448580084, "total_flos": 2232850027267338240, "step": 581700 }, { "loss": 3.0525, "learning_rate": 5.361984986068922e-06, "epoch": 2.3405787481242784, "total_flos": 2233239851202785280, "step": 581800 }, { "loss": 3.035, "learning_rate": 5.3611738606729915e-06, "epoch": 2.340981047668473, "total_flos": 2233636813447802880, "step": 581900 }, { "loss": 3.0125, "learning_rate": 5.360362735277061e-06, "epoch": 2.3413833472126675, "total_flos": 2234030153425612800, "step": 582000 }, { "loss": 3.1, "learning_rate": 5.35955160988113e-06, "epoch": 2.341785646756862, "total_flos": 2234408106734653440, "step": 582100 }, { "loss": 3.1375, "learning_rate": 5.3587404844852e-06, "epoch": 2.342187946301057, "total_flos": 2234799906452213760, "step": 582200 }, { "loss": 3.005, "learning_rate": 5.357929359089269e-06, "epoch": 2.342590245845251, "total_flos": 2235173674502369280, "step": 582300 }, { "loss": 3.07, "learning_rate": 5.357118233693338e-06, "epoch": 2.3429925453894462, "total_flos": 2235551266646937600, "step": 582400 }, { "loss": 3.0275, "learning_rate": 5.3563071082974075e-06, "epoch": 2.343394844933641, "total_flos": 2235926612136038400, "step": 582500 }, { "loss": 3.0575, "learning_rate": 5.3554959829014775e-06, "epoch": 2.3437971444778354, "total_flos": 2236296641071656960, "step": 582600 }, { "loss": 3.1025, "learning_rate": 5.354684857505547e-06, "epoch": 2.34419944402203, "total_flos": 2236682508131635200, "step": 582700 }, { "loss": 3.105, "learning_rate": 5.353873732109616e-06, "epoch": 2.3446017435662245, "total_flos": 2237056833862225920, "step": 582800 }, { "loss": 3.08, "learning_rate": 5.353062606713685e-06, "epoch": 2.345004043110419, "total_flos": 2237441304065495040, "step": 582900 }, { "loss": 3.0575, "learning_rate": 5.352251481317755e-06, "epoch": 2.3454063426546137, "total_flos": 2237828567982182400, "step": 583000 }, { "loss": 3.085, "learning_rate": 5.351440355921824e-06, "epoch": 2.3458086421988082, "total_flos": 2238223469465210880, "step": 583100 }, { "loss": 3.02, "learning_rate": 5.3506292305258935e-06, "epoch": 2.346210941743003, "total_flos": 2238608980671959040, "step": 583200 }, { "loss": 3.015, "learning_rate": 5.349818105129963e-06, "epoch": 2.346613241287198, "total_flos": 2238994316607713280, "step": 583300 }, { "loss": 3.0625, "learning_rate": 5.349006979734033e-06, "epoch": 2.3470155408313924, "total_flos": 2239375106120110080, "step": 583400 }, { "loss": 3.055, "learning_rate": 5.348195854338102e-06, "epoch": 2.347417840375587, "total_flos": 2239762662155120640, "step": 583500 }, { "loss": 2.995, "learning_rate": 5.347384728942171e-06, "epoch": 2.3478201399197816, "total_flos": 2240152108992368640, "step": 583600 }, { "loss": 3.0575, "learning_rate": 5.346573603546241e-06, "epoch": 2.348222439463976, "total_flos": 2240544498257817600, "step": 583700 }, { "loss": 3.06, "learning_rate": 5.34576247815031e-06, "epoch": 2.3486247390081707, "total_flos": 2240909231884922880, "step": 583800 }, { "loss": 3.0675, "learning_rate": 5.34495135275438e-06, "epoch": 2.3490270385523653, "total_flos": 2241310783043235840, "step": 583900 }, { "loss": 3.1125, "learning_rate": 5.344140227358449e-06, "epoch": 2.34942933809656, "total_flos": 2241695083286753280, "step": 584000 }, { "loss": 3.11, "learning_rate": 5.343329101962519e-06, "epoch": 2.3498316376407544, "total_flos": 2242093245872517120, "step": 584100 }, { "loss": 3.045, "learning_rate": 5.342517976566588e-06, "epoch": 2.350233937184949, "total_flos": 2242487499383992320, "step": 584200 }, { "loss": 3.095, "learning_rate": 5.341706851170657e-06, "epoch": 2.3506362367291436, "total_flos": 2242863843386634240, "step": 584300 }, { "loss": 3.0975, "learning_rate": 5.3408957257747264e-06, "epoch": 2.3510385362733386, "total_flos": 2243251420666613760, "step": 584400 }, { "loss": 3.0225, "learning_rate": 5.3400846003787965e-06, "epoch": 2.351440835817533, "total_flos": 2243632651012116480, "step": 584500 }, { "loss": 3.12, "learning_rate": 5.339273474982866e-06, "epoch": 2.3518431353617277, "total_flos": 2244005967606681600, "step": 584600 }, { "loss": 3.1425, "learning_rate": 5.338462349586935e-06, "epoch": 2.3522454349059223, "total_flos": 2244396173951569920, "step": 584700 }, { "loss": 3.0775, "learning_rate": 5.337651224191004e-06, "epoch": 2.352647734450117, "total_flos": 2244788998738882560, "step": 584800 }, { "loss": 3.085, "learning_rate": 5.336840098795074e-06, "epoch": 2.3530500339943115, "total_flos": 2245174483389419520, "step": 584900 }, { "loss": 3.1275, "learning_rate": 5.336028973399143e-06, "epoch": 2.353452333538506, "total_flos": 2245573665733693440, "step": 585000 }, { "loss": 3.0825, "learning_rate": 5.3352178480032125e-06, "epoch": 2.3538546330827006, "total_flos": 2245958704239882240, "step": 585100 }, { "loss": 3.025, "learning_rate": 5.334406722607282e-06, "epoch": 2.354256932626895, "total_flos": 2246340407285944320, "step": 585200 }, { "loss": 3.0625, "learning_rate": 5.333595597211352e-06, "epoch": 2.35465923217109, "total_flos": 2246727001986109440, "step": 585300 }, { "loss": 3.1275, "learning_rate": 5.332784471815421e-06, "epoch": 2.3550615317152843, "total_flos": 2247102687394713600, "step": 585400 }, { "loss": 3.0725, "learning_rate": 5.33197334641949e-06, "epoch": 2.3554638312594793, "total_flos": 2247468653230018560, "step": 585500 }, { "loss": 3.05, "learning_rate": 5.331162221023559e-06, "epoch": 2.355866130803674, "total_flos": 2247842787755888640, "step": 585600 }, { "loss": 3.0475, "learning_rate": 5.330351095627629e-06, "epoch": 2.3562684303478685, "total_flos": 2248214362262999040, "step": 585700 }, { "loss": 3.0975, "learning_rate": 5.3295399702316986e-06, "epoch": 2.356670729892063, "total_flos": 2248609237189816320, "step": 585800 }, { "loss": 3.105, "learning_rate": 5.328728844835768e-06, "epoch": 2.3570730294362576, "total_flos": 2248981889879101440, "step": 585900 }, { "loss": 3.1, "learning_rate": 5.327917719439837e-06, "epoch": 2.357475328980452, "total_flos": 2249362212002181120, "step": 586000 }, { "loss": 3.0325, "learning_rate": 5.327106594043907e-06, "epoch": 2.3578776285246468, "total_flos": 2249750623147192320, "step": 586100 }, { "loss": 3.0275, "learning_rate": 5.326295468647976e-06, "epoch": 2.3582799280688413, "total_flos": 2250147075512954880, "step": 586200 }, { "loss": 3.1125, "learning_rate": 5.325484343252045e-06, "epoch": 2.358682227613036, "total_flos": 2250532719500759040, "step": 586300 }, { "loss": 3.08, "learning_rate": 5.324673217856115e-06, "epoch": 2.359084527157231, "total_flos": 2250925071587512320, "step": 586400 }, { "loss": 3.06, "learning_rate": 5.323862092460185e-06, "epoch": 2.3594868267014255, "total_flos": 2251307725345935360, "step": 586500 }, { "loss": 3.025, "learning_rate": 5.323050967064254e-06, "epoch": 2.35988912624562, "total_flos": 2251695297314672640, "step": 586600 }, { "loss": 3.07, "learning_rate": 5.322239841668323e-06, "epoch": 2.3602914257898147, "total_flos": 2252094145050685440, "step": 586700 }, { "loss": 3.035, "learning_rate": 5.321428716272392e-06, "epoch": 2.3606937253340092, "total_flos": 2252489869776261120, "step": 586800 }, { "loss": 3.0825, "learning_rate": 5.320617590876462e-06, "epoch": 2.361096024878204, "total_flos": 2252874026616238080, "step": 586900 }, { "loss": 3.0825, "learning_rate": 5.3198064654805315e-06, "epoch": 2.3614983244223984, "total_flos": 2253262581164789760, "step": 587000 }, { "loss": 3.035, "learning_rate": 5.318995340084601e-06, "epoch": 2.361900623966593, "total_flos": 2253641368338862080, "step": 587100 }, { "loss": 3.1025, "learning_rate": 5.31818421468867e-06, "epoch": 2.3623029235107875, "total_flos": 2253999749720248320, "step": 587200 }, { "loss": 3.0525, "learning_rate": 5.31737308929274e-06, "epoch": 2.362705223054982, "total_flos": 2254382196340224000, "step": 587300 }, { "loss": 3.05, "learning_rate": 5.316561963896809e-06, "epoch": 2.3631075225991767, "total_flos": 2254760850733240320, "step": 587400 }, { "loss": 3.08, "learning_rate": 5.315750838500878e-06, "epoch": 2.3635098221433717, "total_flos": 2255162221309317120, "step": 587500 }, { "loss": 3.07, "learning_rate": 5.3149397131049475e-06, "epoch": 2.3639121216875663, "total_flos": 2255551052042465280, "step": 587600 }, { "loss": 3.0825, "learning_rate": 5.3141285877090175e-06, "epoch": 2.364314421231761, "total_flos": 2255941375234682880, "step": 587700 }, { "loss": 3.055, "learning_rate": 5.313317462313087e-06, "epoch": 2.3647167207759554, "total_flos": 2256326923620126720, "step": 587800 }, { "loss": 3.035, "learning_rate": 5.312506336917156e-06, "epoch": 2.36511902032015, "total_flos": 2256705933866373120, "step": 587900 }, { "loss": 3.04, "learning_rate": 5.311695211521225e-06, "epoch": 2.3655213198643446, "total_flos": 2257092634791383040, "step": 588000 }, { "loss": 3.055, "learning_rate": 5.310884086125295e-06, "epoch": 2.365923619408539, "total_flos": 2257483170433290240, "step": 588100 }, { "loss": 3.0225, "learning_rate": 5.310072960729364e-06, "epoch": 2.3663259189527337, "total_flos": 2257858128201707520, "step": 588200 }, { "loss": 3.09, "learning_rate": 5.3092618353334335e-06, "epoch": 2.3667282184969283, "total_flos": 2258245174357463040, "step": 588300 }, { "loss": 3.0075, "learning_rate": 5.308450709937504e-06, "epoch": 2.3671305180411233, "total_flos": 2258620025901035520, "step": 588400 }, { "loss": 3.06, "learning_rate": 5.307639584541573e-06, "epoch": 2.367532817585318, "total_flos": 2259005101585920000, "step": 588500 }, { "loss": 3.05, "learning_rate": 5.306828459145642e-06, "epoch": 2.3679351171295124, "total_flos": 2259385301550428160, "step": 588600 }, { "loss": 3.075, "learning_rate": 5.306017333749711e-06, "epoch": 2.368337416673707, "total_flos": 2259770318811648000, "step": 588700 }, { "loss": 3.0325, "learning_rate": 5.305206208353781e-06, "epoch": 2.3687397162179016, "total_flos": 2260148649218887680, "step": 588800 }, { "loss": 3.035, "learning_rate": 5.30439508295785e-06, "epoch": 2.369142015762096, "total_flos": 2260511736360898560, "step": 588900 }, { "loss": 3.0675, "learning_rate": 5.30358395756192e-06, "epoch": 2.3695443153062907, "total_flos": 2260889572822609920, "step": 589000 }, { "loss": 3.02, "learning_rate": 5.302772832165989e-06, "epoch": 2.3699466148504853, "total_flos": 2261256813356052480, "step": 589100 }, { "loss": 3.0525, "learning_rate": 5.301961706770059e-06, "epoch": 2.37034891439468, "total_flos": 2261641442896588800, "step": 589200 }, { "loss": 3.0675, "learning_rate": 5.301150581374128e-06, "epoch": 2.3707512139388744, "total_flos": 2262024765871534080, "step": 589300 }, { "loss": 3.07, "learning_rate": 5.300339455978197e-06, "epoch": 2.371153513483069, "total_flos": 2262414818190397440, "step": 589400 }, { "loss": 3.1025, "learning_rate": 5.2995283305822664e-06, "epoch": 2.371555813027264, "total_flos": 2262800892388823040, "step": 589500 }, { "loss": 3.0925, "learning_rate": 5.2987172051863365e-06, "epoch": 2.3719581125714586, "total_flos": 2263187295884267520, "step": 589600 }, { "loss": 3.075, "learning_rate": 5.297906079790406e-06, "epoch": 2.372360412115653, "total_flos": 2263582000851333120, "step": 589700 }, { "loss": 3.075, "learning_rate": 5.297094954394475e-06, "epoch": 2.3727627116598478, "total_flos": 2263946506095022080, "step": 589800 }, { "loss": 3.0375, "learning_rate": 5.296283828998544e-06, "epoch": 2.3731650112040423, "total_flos": 2264329972473507840, "step": 589900 }, { "loss": 3.0225, "learning_rate": 5.295472703602614e-06, "epoch": 2.373567310748237, "total_flos": 2264736388729712640, "step": 590000 }, { "loss": 3.0625, "learning_rate": 5.294661578206683e-06, "epoch": 2.3739696102924315, "total_flos": 2265115876987760640, "step": 590100 }, { "loss": 3.0825, "learning_rate": 5.2938504528107525e-06, "epoch": 2.374371909836626, "total_flos": 2265494010879037440, "step": 590200 }, { "loss": 3.0275, "learning_rate": 5.293039327414822e-06, "epoch": 2.3747742093808206, "total_flos": 2265891939770142720, "step": 590300 }, { "loss": 3.0375, "learning_rate": 5.292228202018892e-06, "epoch": 2.375176508925015, "total_flos": 2266296008457277440, "step": 590400 }, { "loss": 3.085, "learning_rate": 5.291417076622961e-06, "epoch": 2.3755788084692098, "total_flos": 2266686315715768320, "step": 590500 }, { "loss": 3.1125, "learning_rate": 5.29060595122703e-06, "epoch": 2.375981108013405, "total_flos": 2267057794620518400, "step": 590600 }, { "loss": 3.025, "learning_rate": 5.289794825831099e-06, "epoch": 2.3763834075575994, "total_flos": 2267457407175413760, "step": 590700 }, { "loss": 3.08, "learning_rate": 5.288983700435169e-06, "epoch": 2.376785707101794, "total_flos": 2267846083882536960, "step": 590800 }, { "loss": 3.025, "learning_rate": 5.2881725750392386e-06, "epoch": 2.3771880066459885, "total_flos": 2268239487595253760, "step": 590900 }, { "loss": 3.04, "learning_rate": 5.287361449643308e-06, "epoch": 2.377590306190183, "total_flos": 2268634808666419200, "step": 591000 }, { "loss": 2.985, "learning_rate": 5.286550324247377e-06, "epoch": 2.3779926057343777, "total_flos": 2269018891149004800, "step": 591100 }, { "loss": 3.0575, "learning_rate": 5.285739198851447e-06, "epoch": 2.3783949052785722, "total_flos": 2269402654957056000, "step": 591200 }, { "loss": 3.0825, "learning_rate": 5.284928073455516e-06, "epoch": 2.378797204822767, "total_flos": 2269802134730895360, "step": 591300 }, { "loss": 3.0675, "learning_rate": 5.284116948059585e-06, "epoch": 2.3791995043669614, "total_flos": 2270188766609756160, "step": 591400 }, { "loss": 3.1, "learning_rate": 5.283305822663655e-06, "epoch": 2.3796018039111564, "total_flos": 2270568525741158400, "step": 591500 }, { "loss": 3.065, "learning_rate": 5.282494697267725e-06, "epoch": 2.380004103455351, "total_flos": 2270931411055964160, "step": 591600 }, { "loss": 3.0925, "learning_rate": 5.281683571871794e-06, "epoch": 2.3804064029995455, "total_flos": 2271311908450037760, "step": 591700 }, { "loss": 3.03, "learning_rate": 5.280872446475863e-06, "epoch": 2.38080870254374, "total_flos": 2271673195080929280, "step": 591800 }, { "loss": 3.1, "learning_rate": 5.280061321079932e-06, "epoch": 2.3812110020879347, "total_flos": 2272076074049802240, "step": 591900 }, { "loss": 3.0525, "learning_rate": 5.279250195684002e-06, "epoch": 2.3816133016321293, "total_flos": 2272456959164559360, "step": 592000 }, { "loss": 3.0075, "learning_rate": 5.2784390702880715e-06, "epoch": 2.382015601176324, "total_flos": 2272846475047956480, "step": 592100 }, { "loss": 3.0525, "learning_rate": 5.277627944892141e-06, "epoch": 2.3824179007205184, "total_flos": 2273228401166192640, "step": 592200 }, { "loss": 3.0375, "learning_rate": 5.27681681949621e-06, "epoch": 2.382820200264713, "total_flos": 2273626696533012480, "step": 592300 }, { "loss": 3.0725, "learning_rate": 5.27600569410028e-06, "epoch": 2.3832224998089075, "total_flos": 2274013535550320640, "step": 592400 }, { "loss": 2.995, "learning_rate": 5.275194568704349e-06, "epoch": 2.383624799353102, "total_flos": 2274416653525094400, "step": 592500 }, { "loss": 3.0375, "learning_rate": 5.274383443308418e-06, "epoch": 2.384027098897297, "total_flos": 2274807199789486080, "step": 592600 }, { "loss": 3.0125, "learning_rate": 5.2735723179124875e-06, "epoch": 2.3844293984414917, "total_flos": 2275192243606917120, "step": 592700 }, { "loss": 3.0375, "learning_rate": 5.2727611925165575e-06, "epoch": 2.3848316979856863, "total_flos": 2275586019106590720, "step": 592800 }, { "loss": 3.06, "learning_rate": 5.271950067120627e-06, "epoch": 2.385233997529881, "total_flos": 2275965321471160320, "step": 592900 }, { "loss": 3.0575, "learning_rate": 5.271138941724696e-06, "epoch": 2.3856362970740754, "total_flos": 2276359574982635520, "step": 593000 }, { "loss": 2.96, "learning_rate": 5.270327816328765e-06, "epoch": 2.38603859661827, "total_flos": 2276744533820190720, "step": 593100 }, { "loss": 3.055, "learning_rate": 5.269516690932835e-06, "epoch": 2.3864408961624646, "total_flos": 2277135956439552000, "step": 593200 }, { "loss": 3.0225, "learning_rate": 5.268705565536904e-06, "epoch": 2.386843195706659, "total_flos": 2277533237359104000, "step": 593300 }, { "loss": 3.015, "learning_rate": 5.2678944401409735e-06, "epoch": 2.3872454952508537, "total_flos": 2277911833328455680, "step": 593400 }, { "loss": 3.1025, "learning_rate": 5.267083314745044e-06, "epoch": 2.3876477947950487, "total_flos": 2278296484113960960, "step": 593500 }, { "loss": 3.0375, "learning_rate": 5.266272189349113e-06, "epoch": 2.388050094339243, "total_flos": 2278674081569771520, "step": 593600 }, { "loss": 3.025, "learning_rate": 5.265461063953182e-06, "epoch": 2.388452393883438, "total_flos": 2279041932896071680, "step": 593700 }, { "loss": 2.96, "learning_rate": 5.264649938557251e-06, "epoch": 2.3888546934276325, "total_flos": 2279444859666124800, "step": 593800 }, { "loss": 3.06, "learning_rate": 5.263838813161321e-06, "epoch": 2.389256992971827, "total_flos": 2279806024138444800, "step": 593900 }, { "loss": 3.06, "learning_rate": 5.26302768776539e-06, "epoch": 2.3896592925160216, "total_flos": 2280205923500421120, "step": 594000 }, { "loss": 3.0375, "learning_rate": 5.26221656236946e-06, "epoch": 2.390061592060216, "total_flos": 2280589219919155200, "step": 594100 }, { "loss": 3.0225, "learning_rate": 5.261405436973529e-06, "epoch": 2.3904638916044108, "total_flos": 2280987334703738880, "step": 594200 }, { "loss": 3.07, "learning_rate": 5.260594311577599e-06, "epoch": 2.3908661911486053, "total_flos": 2281376797474713600, "step": 594300 }, { "loss": 3.0175, "learning_rate": 5.259783186181668e-06, "epoch": 2.3912684906928, "total_flos": 2281744091120578560, "step": 594400 }, { "loss": 3.0425, "learning_rate": 5.258972060785737e-06, "epoch": 2.3916707902369945, "total_flos": 2282119903998996480, "step": 594500 }, { "loss": 3.075, "learning_rate": 5.2581609353898064e-06, "epoch": 2.3920730897811895, "total_flos": 2282499466614435840, "step": 594600 }, { "loss": 3.0625, "learning_rate": 5.2573498099938765e-06, "epoch": 2.392475389325384, "total_flos": 2282881413977640960, "step": 594700 }, { "loss": 3.055, "learning_rate": 5.256538684597946e-06, "epoch": 2.3928776888695786, "total_flos": 2283244171822632960, "step": 594800 }, { "loss": 3.035, "learning_rate": 5.255727559202015e-06, "epoch": 2.393279988413773, "total_flos": 2283619442954342400, "step": 594900 }, { "loss": 3.0225, "learning_rate": 5.254916433806084e-06, "epoch": 2.393682287957968, "total_flos": 2284008661408174080, "step": 595000 }, { "loss": 3.01, "learning_rate": 5.254105308410155e-06, "epoch": 2.3940845875021624, "total_flos": 2284392149031628800, "step": 595100 }, { "loss": 3.0475, "learning_rate": 5.253294183014223e-06, "epoch": 2.394486887046357, "total_flos": 2284776539566264320, "step": 595200 }, { "loss": 3.055, "learning_rate": 5.2524830576182925e-06, "epoch": 2.3948891865905515, "total_flos": 2285168179946557440, "step": 595300 }, { "loss": 3.0375, "learning_rate": 5.251671932222362e-06, "epoch": 2.395291486134746, "total_flos": 2285553738954485760, "step": 595400 }, { "loss": 3.145, "learning_rate": 5.250860806826433e-06, "epoch": 2.3956937856789406, "total_flos": 2285928022195138560, "step": 595500 }, { "loss": 3.0625, "learning_rate": 5.250049681430501e-06, "epoch": 2.396096085223135, "total_flos": 2286310288232878080, "step": 595600 }, { "loss": 3.0525, "learning_rate": 5.24923855603457e-06, "epoch": 2.3964983847673302, "total_flos": 2286688443369123840, "step": 595700 }, { "loss": 3.0525, "learning_rate": 5.248427430638639e-06, "epoch": 2.396900684311525, "total_flos": 2287078368218173440, "step": 595800 }, { "loss": 3.025, "learning_rate": 5.24761630524271e-06, "epoch": 2.3973029838557194, "total_flos": 2287470821218529280, "step": 595900 }, { "loss": 3.06, "learning_rate": 5.2468051798467786e-06, "epoch": 2.397705283399914, "total_flos": 2287857856751800320, "step": 596000 }, { "loss": 3.0925, "learning_rate": 5.245994054450848e-06, "epoch": 2.3981075829441085, "total_flos": 2288223716362260480, "step": 596100 }, { "loss": 3.0175, "learning_rate": 5.245182929054917e-06, "epoch": 2.398509882488303, "total_flos": 2288605132601241600, "step": 596200 }, { "loss": 3.06, "learning_rate": 5.244371803658988e-06, "epoch": 2.3989121820324977, "total_flos": 2288998600048865280, "step": 596300 }, { "loss": 3.08, "learning_rate": 5.243560678263056e-06, "epoch": 2.3993144815766922, "total_flos": 2289383670422507520, "step": 596400 }, { "loss": 3.03, "learning_rate": 5.242749552867125e-06, "epoch": 2.399716781120887, "total_flos": 2289758617568440320, "step": 596500 }, { "loss": 2.9925, "learning_rate": 5.241938427471195e-06, "epoch": 2.400119080665082, "total_flos": 2290135195265740800, "step": 596600 }, { "loss": 3.0525, "learning_rate": 5.2411273020752655e-06, "epoch": 2.4005213802092764, "total_flos": 2290527271167897600, "step": 596700 }, { "loss": 3.0675, "learning_rate": 5.240316176679334e-06, "epoch": 2.400923679753471, "total_flos": 2290899807009853440, "step": 596800 }, { "loss": 3.04, "learning_rate": 5.239505051283403e-06, "epoch": 2.4013259792976656, "total_flos": 2291289099821076480, "step": 596900 }, { "loss": 3.105, "learning_rate": 5.238693925887472e-06, "epoch": 2.40172827884186, "total_flos": 2291669613148876800, "step": 597000 }, { "loss": 3.11, "learning_rate": 5.237882800491543e-06, "epoch": 2.4021305783860547, "total_flos": 2292071647630233600, "step": 597100 }, { "loss": 3.0425, "learning_rate": 5.2370716750956115e-06, "epoch": 2.4025328779302493, "total_flos": 2292462167338414080, "step": 597200 }, { "loss": 3.085, "learning_rate": 5.236260549699681e-06, "epoch": 2.402935177474444, "total_flos": 2292837518138757120, "step": 597300 }, { "loss": 3.0275, "learning_rate": 5.23544942430375e-06, "epoch": 2.4033374770186384, "total_flos": 2293223055901716480, "step": 597400 }, { "loss": 3.0475, "learning_rate": 5.234638298907821e-06, "epoch": 2.403739776562833, "total_flos": 2293599298990755840, "step": 597500 }, { "loss": 3.05, "learning_rate": 5.23382717351189e-06, "epoch": 2.4041420761070276, "total_flos": 2293974543566254080, "step": 597600 }, { "loss": 3.0575, "learning_rate": 5.233016048115958e-06, "epoch": 2.4045443756512226, "total_flos": 2294359077504430080, "step": 597700 }, { "loss": 3.0275, "learning_rate": 5.2322049227200275e-06, "epoch": 2.404946675195417, "total_flos": 2294742214585896960, "step": 597800 }, { "loss": 3.0675, "learning_rate": 5.231393797324098e-06, "epoch": 2.4053489747396117, "total_flos": 2295142469801103360, "step": 597900 }, { "loss": 3.0325, "learning_rate": 5.2305826719281676e-06, "epoch": 2.4057512742838063, "total_flos": 2295506013709946880, "step": 598000 }, { "loss": 3.0775, "learning_rate": 5.229771546532236e-06, "epoch": 2.406153573828001, "total_flos": 2295895805777940480, "step": 598100 }, { "loss": 3.025, "learning_rate": 5.228960421136307e-06, "epoch": 2.4065558733721955, "total_flos": 2296288619942768640, "step": 598200 }, { "loss": 3.0525, "learning_rate": 5.228149295740376e-06, "epoch": 2.40695817291639, "total_flos": 2296661086738575360, "step": 598300 }, { "loss": 3.02, "learning_rate": 5.227338170344445e-06, "epoch": 2.4073604724605846, "total_flos": 2297055180912783360, "step": 598400 }, { "loss": 3.05, "learning_rate": 5.2265270449485135e-06, "epoch": 2.407762772004779, "total_flos": 2297429272948715520, "step": 598500 }, { "loss": 3.065, "learning_rate": 5.2257159195525844e-06, "epoch": 2.4081650715489737, "total_flos": 2297809834077696000, "step": 598600 }, { "loss": 3.0125, "learning_rate": 5.224904794156654e-06, "epoch": 2.4085673710931683, "total_flos": 2298196179149475840, "step": 598700 }, { "loss": 3.0125, "learning_rate": 5.224093668760723e-06, "epoch": 2.4089696706373633, "total_flos": 2298574254617088000, "step": 598800 }, { "loss": 3.065, "learning_rate": 5.223282543364791e-06, "epoch": 2.409371970181558, "total_flos": 2298946801081528320, "step": 598900 }, { "loss": 3.045, "learning_rate": 5.222471417968862e-06, "epoch": 2.4097742697257525, "total_flos": 2299338616732815360, "step": 599000 }, { "loss": 3.0675, "learning_rate": 5.221660292572931e-06, "epoch": 2.410176569269947, "total_flos": 2299716819670241280, "step": 599100 }, { "loss": 2.9875, "learning_rate": 5.2208491671770005e-06, "epoch": 2.4105788688141416, "total_flos": 2300105108656680960, "step": 599200 }, { "loss": 3.035, "learning_rate": 5.220038041781069e-06, "epoch": 2.410981168358336, "total_flos": 2300476306065592320, "step": 599300 }, { "loss": 3.075, "learning_rate": 5.21922691638514e-06, "epoch": 2.4113834679025308, "total_flos": 2300843047342264320, "step": 599400 }, { "loss": 2.9975, "learning_rate": 5.218415790989209e-06, "epoch": 2.4117857674467253, "total_flos": 2301234156598333440, "step": 599500 }, { "loss": 3.0525, "learning_rate": 5.217604665593278e-06, "epoch": 2.41218806699092, "total_flos": 2301625021537259520, "step": 599600 }, { "loss": 3.035, "learning_rate": 5.2167935401973464e-06, "epoch": 2.412590366535115, "total_flos": 2302005826983383040, "step": 599700 }, { "loss": 2.98, "learning_rate": 5.215982414801417e-06, "epoch": 2.4129926660793095, "total_flos": 2302393202436157440, "step": 599800 }, { "loss": 3.05, "learning_rate": 5.2151712894054865e-06, "epoch": 2.413394965623504, "total_flos": 2302776174869114880, "step": 599900 }, { "loss": 3.025, "learning_rate": 5.214360164009556e-06, "epoch": 2.4137972651676987, "total_flos": 2303144509518458880, "step": 600000 }, { "loss": 3.075, "learning_rate": 5.213549038613625e-06, "epoch": 2.4141995647118932, "total_flos": 2303540133330432000, "step": 600100 }, { "loss": 3.025, "learning_rate": 5.212737913217695e-06, "epoch": 2.414601864256088, "total_flos": 2303928177999728640, "step": 600200 }, { "loss": 3.0525, "learning_rate": 5.211926787821764e-06, "epoch": 2.4150041638002824, "total_flos": 2304331163193446400, "step": 600300 }, { "loss": 3.035, "learning_rate": 5.211115662425833e-06, "epoch": 2.415406463344477, "total_flos": 2304723440922808320, "step": 600400 }, { "loss": 3.05, "learning_rate": 5.2103045370299025e-06, "epoch": 2.4158087628886715, "total_flos": 2305101989090979840, "step": 600500 }, { "loss": 3.0325, "learning_rate": 5.209493411633973e-06, "epoch": 2.416211062432866, "total_flos": 2305497602280468480, "step": 600600 }, { "loss": 3.0275, "learning_rate": 5.208682286238042e-06, "epoch": 2.4166133619770607, "total_flos": 2305885747863367680, "step": 600700 }, { "loss": 3.055, "learning_rate": 5.207871160842111e-06, "epoch": 2.4170156615212557, "total_flos": 2306273064892477440, "step": 600800 }, { "loss": 3.015, "learning_rate": 5.20706003544618e-06, "epoch": 2.4174179610654503, "total_flos": 2306662543597178880, "step": 600900 }, { "loss": 3.0725, "learning_rate": 5.20624891005025e-06, "epoch": 2.417820260609645, "total_flos": 2307039455902740480, "step": 601000 }, { "loss": 3.0775, "learning_rate": 5.205437784654319e-06, "epoch": 2.4182225601538394, "total_flos": 2307424595322531840, "step": 601100 }, { "loss": 3.04, "learning_rate": 5.204626659258389e-06, "epoch": 2.418624859698034, "total_flos": 2307798443041320960, "step": 601200 }, { "loss": 3.005, "learning_rate": 5.203815533862458e-06, "epoch": 2.4190271592422286, "total_flos": 2308178398688686080, "step": 601300 }, { "loss": 3.0575, "learning_rate": 5.203004408466528e-06, "epoch": 2.419429458786423, "total_flos": 2308566565516554240, "step": 601400 }, { "loss": 3.0525, "learning_rate": 5.202193283070597e-06, "epoch": 2.4198317583306177, "total_flos": 2308969513531576320, "step": 601500 }, { "loss": 3.01, "learning_rate": 5.201382157674666e-06, "epoch": 2.4202340578748123, "total_flos": 2309362747284541440, "step": 601600 }, { "loss": 3.055, "learning_rate": 5.2005710322787354e-06, "epoch": 2.4206363574190073, "total_flos": 2309760389368565760, "step": 601700 }, { "loss": 3.0975, "learning_rate": 5.1997599068828055e-06, "epoch": 2.4210386569632014, "total_flos": 2310135283402076160, "step": 601800 }, { "loss": 3.0125, "learning_rate": 5.198948781486875e-06, "epoch": 2.4214409565073964, "total_flos": 2310516795243417600, "step": 601900 }, { "loss": 3.0375, "learning_rate": 5.198137656090944e-06, "epoch": 2.421843256051591, "total_flos": 2310908924257996800, "step": 602000 }, { "loss": 3.05, "learning_rate": 5.197326530695013e-06, "epoch": 2.4222455555957856, "total_flos": 2311274433326469120, "step": 602100 }, { "loss": 3.035, "learning_rate": 5.196515405299083e-06, "epoch": 2.42264785513998, "total_flos": 2311671480551362560, "step": 602200 }, { "loss": 3.0425, "learning_rate": 5.195704279903152e-06, "epoch": 2.4230501546841747, "total_flos": 2312052880856616960, "step": 602300 }, { "loss": 3.0775, "learning_rate": 5.1948931545072215e-06, "epoch": 2.4234524542283693, "total_flos": 2312441270756659200, "step": 602400 }, { "loss": 3.03, "learning_rate": 5.194082029111291e-06, "epoch": 2.423854753772564, "total_flos": 2312820833372098560, "step": 602500 }, { "loss": 3.0425, "learning_rate": 5.193270903715361e-06, "epoch": 2.4242570533167584, "total_flos": 2313205808143380480, "step": 602600 }, { "loss": 3.0875, "learning_rate": 5.19245977831943e-06, "epoch": 2.424659352860953, "total_flos": 2313593135794974720, "step": 602700 }, { "loss": 3.0675, "learning_rate": 5.191648652923499e-06, "epoch": 2.425061652405148, "total_flos": 2313979831408742400, "step": 602800 }, { "loss": 3.0775, "learning_rate": 5.190837527527569e-06, "epoch": 2.4254639519493426, "total_flos": 2314357120812503040, "step": 602900 }, { "loss": 3.065, "learning_rate": 5.190026402131638e-06, "epoch": 2.425866251493537, "total_flos": 2314746599517204480, "step": 603000 }, { "loss": 3.025, "learning_rate": 5.1892152767357076e-06, "epoch": 2.4262685510377318, "total_flos": 2315136720882216960, "step": 603100 }, { "loss": 3.0325, "learning_rate": 5.188404151339777e-06, "epoch": 2.4266708505819263, "total_flos": 2315507121604792320, "step": 603200 }, { "loss": 3.08, "learning_rate": 5.187593025943847e-06, "epoch": 2.427073150126121, "total_flos": 2315894077469429760, "step": 603300 }, { "loss": 3.02, "learning_rate": 5.186781900547916e-06, "epoch": 2.4274754496703155, "total_flos": 2316287693631836160, "step": 603400 }, { "loss": 3.0775, "learning_rate": 5.185970775151985e-06, "epoch": 2.42787774921451, "total_flos": 2316656378823168000, "step": 603500 }, { "loss": 3.04, "learning_rate": 5.185159649756054e-06, "epoch": 2.4282800487587046, "total_flos": 2317026715810836480, "step": 603600 }, { "loss": 3.09, "learning_rate": 5.1843485243601244e-06, "epoch": 2.428682348302899, "total_flos": 2317418690799390720, "step": 603700 }, { "loss": 3.045, "learning_rate": 5.183537398964194e-06, "epoch": 2.4290846478470938, "total_flos": 2317788448861655040, "step": 603800 }, { "loss": 3.01, "learning_rate": 5.182726273568263e-06, "epoch": 2.429486947391289, "total_flos": 2318158530909696000, "step": 603900 }, { "loss": 3.025, "learning_rate": 5.181915148172332e-06, "epoch": 2.4298892469354834, "total_flos": 2318538258173644800, "step": 604000 }, { "loss": 3.0875, "learning_rate": 5.181104022776402e-06, "epoch": 2.430291546479678, "total_flos": 2318929691415490560, "step": 604100 }, { "loss": 3.0225, "learning_rate": 5.180292897380471e-06, "epoch": 2.4306938460238725, "total_flos": 2319317513012613120, "step": 604200 }, { "loss": 3.0675, "learning_rate": 5.1794817719845405e-06, "epoch": 2.431096145568067, "total_flos": 2319710220952596480, "step": 604300 }, { "loss": 3.0275, "learning_rate": 5.17867064658861e-06, "epoch": 2.4314984451122617, "total_flos": 2320099699657297920, "step": 604400 }, { "loss": 3.075, "learning_rate": 5.17785952119268e-06, "epoch": 2.4319007446564562, "total_flos": 2320488264828334080, "step": 604500 }, { "loss": 3.055, "learning_rate": 5.177048395796749e-06, "epoch": 2.432303044200651, "total_flos": 2320862585247682560, "step": 604600 }, { "loss": 3.0475, "learning_rate": 5.176237270400818e-06, "epoch": 2.4327053437448454, "total_flos": 2321249376463810560, "step": 604700 }, { "loss": 3.05, "learning_rate": 5.175426145004887e-06, "epoch": 2.4331076432890404, "total_flos": 2321636332328448000, "step": 604800 }, { "loss": 3.0025, "learning_rate": 5.174615019608957e-06, "epoch": 2.433509942833235, "total_flos": 2322003461325803520, "step": 604900 }, { "loss": 3.035, "learning_rate": 5.1738038942130265e-06, "epoch": 2.4339122423774295, "total_flos": 2322385934501990400, "step": 605000 }, { "loss": 3.03, "learning_rate": 5.172992768817096e-06, "epoch": 2.434314541921624, "total_flos": 2322768604194140160, "step": 605100 }, { "loss": 2.9975, "learning_rate": 5.172181643421165e-06, "epoch": 2.4347168414658187, "total_flos": 2323170888303882240, "step": 605200 }, { "loss": 3.02, "learning_rate": 5.171370518025235e-06, "epoch": 2.4351191410100133, "total_flos": 2323549638299258880, "step": 605300 }, { "loss": 3.0425, "learning_rate": 5.170559392629304e-06, "epoch": 2.435521440554208, "total_flos": 2323939223228805120, "step": 605400 }, { "loss": 3.0575, "learning_rate": 5.169748267233373e-06, "epoch": 2.4359237400984024, "total_flos": 2324328292967854080, "step": 605500 }, { "loss": 3.0725, "learning_rate": 5.1689371418374425e-06, "epoch": 2.436326039642597, "total_flos": 2324707154499317760, "step": 605600 }, { "loss": 3.07, "learning_rate": 5.168126016441513e-06, "epoch": 2.4367283391867915, "total_flos": 2325099320692592640, "step": 605700 }, { "loss": 3.0175, "learning_rate": 5.167314891045582e-06, "epoch": 2.437130638730986, "total_flos": 2325482861428469760, "step": 605800 }, { "loss": 3.035, "learning_rate": 5.166503765649651e-06, "epoch": 2.437532938275181, "total_flos": 2325870236881244160, "step": 605900 }, { "loss": 3.06, "learning_rate": 5.16569264025372e-06, "epoch": 2.4379352378193757, "total_flos": 2326255886180290560, "step": 606000 }, { "loss": 3.02, "learning_rate": 5.16488151485779e-06, "epoch": 2.4383375373635703, "total_flos": 2326642045358592000, "step": 606100 }, { "loss": 3.0275, "learning_rate": 5.164070389461859e-06, "epoch": 2.438739836907765, "total_flos": 2327027259135774720, "step": 606200 }, { "loss": 3.0375, "learning_rate": 5.163259264065929e-06, "epoch": 2.4391421364519594, "total_flos": 2327411846186373120, "step": 606300 }, { "loss": 3.065, "learning_rate": 5.162448138669998e-06, "epoch": 2.439544435996154, "total_flos": 2327810959484497920, "step": 606400 }, { "loss": 3.0225, "learning_rate": 5.161637013274068e-06, "epoch": 2.4399467355403486, "total_flos": 2328202307746467840, "step": 606500 }, { "loss": 3.0225, "learning_rate": 5.160825887878137e-06, "epoch": 2.440349035084543, "total_flos": 2328575518116188160, "step": 606600 }, { "loss": 3.0325, "learning_rate": 5.160014762482206e-06, "epoch": 2.4407513346287377, "total_flos": 2328955319737528320, "step": 606700 }, { "loss": 3.07, "learning_rate": 5.1592036370862754e-06, "epoch": 2.4411536341729323, "total_flos": 2329339805874524160, "step": 606800 }, { "loss": 3.0275, "learning_rate": 5.1583925116903455e-06, "epoch": 2.441555933717127, "total_flos": 2329725980986552320, "step": 606900 }, { "loss": 3.01, "learning_rate": 5.157581386294415e-06, "epoch": 2.441958233261322, "total_flos": 2330106627095408640, "step": 607000 }, { "loss": 3.0675, "learning_rate": 5.156770260898484e-06, "epoch": 2.4423605328055165, "total_flos": 2330486625232711680, "step": 607100 }, { "loss": 3.0125, "learning_rate": 5.155959135502553e-06, "epoch": 2.442762832349711, "total_flos": 2330853090324787200, "step": 607200 }, { "loss": 3.0125, "learning_rate": 5.155148010106623e-06, "epoch": 2.4431651318939056, "total_flos": 2331229763624448000, "step": 607300 }, { "loss": 3.015, "learning_rate": 5.154336884710692e-06, "epoch": 2.4435674314381, "total_flos": 2331621133131386880, "step": 607400 }, { "loss": 3.06, "learning_rate": 5.1535257593147615e-06, "epoch": 2.4439697309822948, "total_flos": 2332007701275340800, "step": 607500 }, { "loss": 3.0625, "learning_rate": 5.1527146339188315e-06, "epoch": 2.4443720305264893, "total_flos": 2332394200373145600, "step": 607600 }, { "loss": 3.04, "learning_rate": 5.151903508522901e-06, "epoch": 2.444774330070684, "total_flos": 2332793435829841920, "step": 607700 }, { "loss": 3.0075, "learning_rate": 5.15109238312697e-06, "epoch": 2.4451766296148785, "total_flos": 2333183737777090560, "step": 607800 }, { "loss": 3.0275, "learning_rate": 5.150281257731039e-06, "epoch": 2.4455789291590735, "total_flos": 2333574777987010560, "step": 607900 }, { "loss": 3.0575, "learning_rate": 5.149470132335109e-06, "epoch": 2.445981228703268, "total_flos": 2333962966059847680, "step": 608000 }, { "loss": 3.0075, "learning_rate": 5.148659006939178e-06, "epoch": 2.4463835282474626, "total_flos": 2334336383568015360, "step": 608100 }, { "loss": 3.025, "learning_rate": 5.1478478815432476e-06, "epoch": 2.446785827791657, "total_flos": 2334745126148321280, "step": 608200 }, { "loss": 3.09, "learning_rate": 5.147036756147317e-06, "epoch": 2.447188127335852, "total_flos": 2335132448488673280, "step": 608300 }, { "loss": 3.0275, "learning_rate": 5.146225630751387e-06, "epoch": 2.4475904268800464, "total_flos": 2335499131341680640, "step": 608400 }, { "loss": 3.04, "learning_rate": 5.145414505355456e-06, "epoch": 2.447992726424241, "total_flos": 2335904942116270080, "step": 608500 }, { "loss": 3.0575, "learning_rate": 5.144603379959525e-06, "epoch": 2.4483950259684355, "total_flos": 2336286831055810560, "step": 608600 }, { "loss": 3.0675, "learning_rate": 5.143792254563594e-06, "epoch": 2.44879732551263, "total_flos": 2336669033358643200, "step": 608700 }, { "loss": 3.05, "learning_rate": 5.1429811291676644e-06, "epoch": 2.4491996250568246, "total_flos": 2337047066336317440, "step": 608800 }, { "loss": 3.04, "learning_rate": 5.142170003771734e-06, "epoch": 2.449601924601019, "total_flos": 2337420505089454080, "step": 608900 }, { "loss": 3.0375, "learning_rate": 5.141358878375803e-06, "epoch": 2.4500042241452142, "total_flos": 2337813436101611520, "step": 609000 }, { "loss": 3.0525, "learning_rate": 5.140547752979872e-06, "epoch": 2.450406523689409, "total_flos": 2338198692368732160, "step": 609100 }, { "loss": 3.0225, "learning_rate": 5.139736627583942e-06, "epoch": 2.4508088232336034, "total_flos": 2338585680100823040, "step": 609200 }, { "loss": 3.0925, "learning_rate": 5.138925502188011e-06, "epoch": 2.451211122777798, "total_flos": 2338960829073960960, "step": 609300 }, { "loss": 3.035, "learning_rate": 5.1381143767920805e-06, "epoch": 2.4516134223219925, "total_flos": 2339328292679577600, "step": 609400 }, { "loss": 3.0425, "learning_rate": 5.13730325139615e-06, "epoch": 2.452015721866187, "total_flos": 2339714722731233280, "step": 609500 }, { "loss": 3.0575, "learning_rate": 5.13649212600022e-06, "epoch": 2.4524180214103817, "total_flos": 2340113559844761600, "step": 609600 }, { "loss": 3.035, "learning_rate": 5.135681000604289e-06, "epoch": 2.4528203209545762, "total_flos": 2340495491274240000, "step": 609700 }, { "loss": 3.0175, "learning_rate": 5.134869875208358e-06, "epoch": 2.453222620498771, "total_flos": 2340873731390361600, "step": 609800 }, { "loss": 3.0075, "learning_rate": 5.134058749812427e-06, "epoch": 2.453624920042966, "total_flos": 2341259943681085440, "step": 609900 }, { "loss": 2.9975, "learning_rate": 5.133247624416497e-06, "epoch": 2.45402721958716, "total_flos": 2341643548151869440, "step": 610000 }, { "loss": 3.07, "learning_rate": 5.1324364990205665e-06, "epoch": 2.454429519131355, "total_flos": 2342022558398115840, "step": 610100 }, { "loss": 3.025, "learning_rate": 5.131625373624636e-06, "epoch": 2.4548318186755496, "total_flos": 2342395997151252480, "step": 610200 }, { "loss": 3.06, "learning_rate": 5.130814248228705e-06, "epoch": 2.455234118219744, "total_flos": 2342801383026462720, "step": 610300 }, { "loss": 3.0275, "learning_rate": 5.130003122832775e-06, "epoch": 2.4556364177639387, "total_flos": 2343176356728606720, "step": 610400 }, { "loss": 3.0175, "learning_rate": 5.129191997436844e-06, "epoch": 2.4560387173081333, "total_flos": 2343558144754544640, "step": 610500 }, { "loss": 3.06, "learning_rate": 5.128380872040913e-06, "epoch": 2.456441016852328, "total_flos": 2343952578848256000, "step": 610600 }, { "loss": 3.0375, "learning_rate": 5.1275697466449825e-06, "epoch": 2.4568433163965224, "total_flos": 2344338010386370560, "step": 610700 }, { "loss": 3.0425, "learning_rate": 5.126758621249053e-06, "epoch": 2.457245615940717, "total_flos": 2344711804992737280, "step": 610800 }, { "loss": 3.0, "learning_rate": 5.125947495853122e-06, "epoch": 2.4576479154849116, "total_flos": 2345088265842708480, "step": 610900 }, { "loss": 3.0175, "learning_rate": 5.125136370457191e-06, "epoch": 2.4580502150291066, "total_flos": 2345455145211678720, "step": 611000 }, { "loss": 3.0225, "learning_rate": 5.12432524506126e-06, "epoch": 2.458452514573301, "total_flos": 2345854874613903360, "step": 611100 }, { "loss": 3.03, "learning_rate": 5.12351411966533e-06, "epoch": 2.4588548141174957, "total_flos": 2346232079037788160, "step": 611200 }, { "loss": 2.9875, "learning_rate": 5.122702994269399e-06, "epoch": 2.4592571136616903, "total_flos": 2346622970532925440, "step": 611300 }, { "loss": 2.995, "learning_rate": 5.121891868873469e-06, "epoch": 2.459659413205885, "total_flos": 2346977044496855040, "step": 611400 }, { "loss": 3.015, "learning_rate": 5.121080743477538e-06, "epoch": 2.4600617127500795, "total_flos": 2347365705270251520, "step": 611500 }, { "loss": 2.98, "learning_rate": 5.120269618081608e-06, "epoch": 2.460464012294274, "total_flos": 2347735176525434880, "step": 611600 }, { "loss": 3.0075, "learning_rate": 5.119458492685677e-06, "epoch": 2.4608663118384686, "total_flos": 2348118037422305280, "step": 611700 }, { "loss": 3.0775, "learning_rate": 5.118647367289746e-06, "epoch": 2.461268611382663, "total_flos": 2348490960984944640, "step": 611800 }, { "loss": 3.055, "learning_rate": 5.1178362418938154e-06, "epoch": 2.4616709109268577, "total_flos": 2348874416740945920, "step": 611900 }, { "loss": 3.0275, "learning_rate": 5.1170251164978855e-06, "epoch": 2.4620732104710523, "total_flos": 2349256927095828480, "step": 612000 }, { "loss": 3.01, "learning_rate": 5.116213991101955e-06, "epoch": 2.4624755100152473, "total_flos": 2349660331877683200, "step": 612100 }, { "loss": 3.0, "learning_rate": 5.115402865706024e-06, "epoch": 2.462877809559442, "total_flos": 2350033388221378560, "step": 612200 }, { "loss": 3.05, "learning_rate": 5.114591740310094e-06, "epoch": 2.4632801091036365, "total_flos": 2350401244858920960, "step": 612300 }, { "loss": 2.9975, "learning_rate": 5.113780614914163e-06, "epoch": 2.463682408647831, "total_flos": 2350790532358901760, "step": 612400 }, { "loss": 3.0525, "learning_rate": 5.112969489518232e-06, "epoch": 2.4640847081920256, "total_flos": 2351188769302056960, "step": 612500 }, { "loss": 3.035, "learning_rate": 5.1121583641223015e-06, "epoch": 2.46448700773622, "total_flos": 2351556907435438080, "step": 612600 }, { "loss": 2.9975, "learning_rate": 5.1113472387263715e-06, "epoch": 2.4648893072804148, "total_flos": 2351931243788513280, "step": 612700 }, { "loss": 3.08, "learning_rate": 5.110536113330441e-06, "epoch": 2.4652916068246093, "total_flos": 2352334053711237120, "step": 612800 }, { "loss": 3.055, "learning_rate": 5.10972498793451e-06, "epoch": 2.465693906368804, "total_flos": 2352716017008168960, "step": 612900 }, { "loss": 3.075, "learning_rate": 5.108913862538579e-06, "epoch": 2.466096205912999, "total_flos": 2353100503145164800, "step": 613000 }, { "loss": 3.0325, "learning_rate": 5.108102737142649e-06, "epoch": 2.466498505457193, "total_flos": 2353487682081976320, "step": 613100 }, { "loss": 3.075, "learning_rate": 5.107291611746718e-06, "epoch": 2.466900805001388, "total_flos": 2353900944529428480, "step": 613200 }, { "loss": 3.0575, "learning_rate": 5.1064804863507876e-06, "epoch": 2.4673031045455827, "total_flos": 2354298289183887360, "step": 613300 }, { "loss": 3.045, "learning_rate": 5.105669360954857e-06, "epoch": 2.4677054040897772, "total_flos": 2354669491904040960, "step": 613400 }, { "loss": 3.0375, "learning_rate": 5.104858235558927e-06, "epoch": 2.468107703633972, "total_flos": 2355057355991101440, "step": 613500 }, { "loss": 3.0125, "learning_rate": 5.104047110162996e-06, "epoch": 2.4685100031781664, "total_flos": 2355433530033991680, "step": 613600 }, { "loss": 3.0975, "learning_rate": 5.103235984767065e-06, "epoch": 2.468912302722361, "total_flos": 2355823762935091200, "step": 613700 }, { "loss": 3.0375, "learning_rate": 5.102424859371134e-06, "epoch": 2.4693146022665555, "total_flos": 2356218542259548160, "step": 613800 }, { "loss": 3.0525, "learning_rate": 5.1016137339752044e-06, "epoch": 2.46971690181075, "total_flos": 2356590026475540480, "step": 613900 }, { "loss": 3.0625, "learning_rate": 5.100802608579274e-06, "epoch": 2.4701192013549447, "total_flos": 2356973211358187520, "step": 614000 }, { "loss": 3.02, "learning_rate": 5.099991483183343e-06, "epoch": 2.4705215008991397, "total_flos": 2357362817532702720, "step": 614100 }, { "loss": 3.0275, "learning_rate": 5.099180357787412e-06, "epoch": 2.4709238004433343, "total_flos": 2357744005388267520, "step": 614200 }, { "loss": 3.0525, "learning_rate": 5.098369232391482e-06, "epoch": 2.471326099987529, "total_flos": 2358118575436001280, "step": 614300 }, { "loss": 3.025, "learning_rate": 5.097558106995551e-06, "epoch": 2.4717283995317234, "total_flos": 2358496640281128960, "step": 614400 }, { "loss": 3.0, "learning_rate": 5.0967469815996205e-06, "epoch": 2.472130699075918, "total_flos": 2358888174436577280, "step": 614500 }, { "loss": 2.9825, "learning_rate": 5.09593585620369e-06, "epoch": 2.4725329986201126, "total_flos": 2359280106935193600, "step": 614600 }, { "loss": 2.9825, "learning_rate": 5.09512473080776e-06, "epoch": 2.472935298164307, "total_flos": 2359654900055101440, "step": 614700 }, { "loss": 3.01, "learning_rate": 5.094313605411829e-06, "epoch": 2.4733375977085017, "total_flos": 2360017031173509120, "step": 614800 }, { "loss": 3.0525, "learning_rate": 5.093502480015898e-06, "epoch": 2.4737398972526963, "total_flos": 2360416032935546880, "step": 614900 }, { "loss": 3.045, "learning_rate": 5.092691354619967e-06, "epoch": 2.474142196796891, "total_flos": 2360812718995968000, "step": 615000 }, { "loss": 3.01, "learning_rate": 5.091880229224037e-06, "epoch": 2.4745444963410854, "total_flos": 2361215311157760000, "step": 615100 }, { "loss": 3.05, "learning_rate": 5.0910691038281065e-06, "epoch": 2.4749467958852804, "total_flos": 2361599521110159360, "step": 615200 }, { "loss": 3.05, "learning_rate": 5.090257978432176e-06, "epoch": 2.475349095429475, "total_flos": 2361990449783992320, "step": 615300 }, { "loss": 3.0475, "learning_rate": 5.089446853036245e-06, "epoch": 2.4757513949736696, "total_flos": 2362369507831418880, "step": 615400 }, { "loss": 3.0175, "learning_rate": 5.088635727640315e-06, "epoch": 2.476153694517864, "total_flos": 2362764536784261120, "step": 615500 }, { "loss": 3.0275, "learning_rate": 5.087824602244384e-06, "epoch": 2.4765559940620587, "total_flos": 2363141964280320000, "step": 615600 }, { "loss": 3.0225, "learning_rate": 5.087013476848453e-06, "epoch": 2.4769582936062533, "total_flos": 2363522339515822080, "step": 615700 }, { "loss": 3.07, "learning_rate": 5.0862023514525225e-06, "epoch": 2.477360593150448, "total_flos": 2363900871750266880, "step": 615800 }, { "loss": 3.0825, "learning_rate": 5.085391226056593e-06, "epoch": 2.4777628926946424, "total_flos": 2364287583297761280, "step": 615900 }, { "loss": 3.02, "learning_rate": 5.084580100660662e-06, "epoch": 2.478165192238837, "total_flos": 2364668760530841600, "step": 616000 }, { "loss": 3.02, "learning_rate": 5.083768975264731e-06, "epoch": 2.478567491783032, "total_flos": 2365044159132364800, "step": 616100 }, { "loss": 3.06, "learning_rate": 5.0829578498688e-06, "epoch": 2.4789697913272266, "total_flos": 2365437472553963520, "step": 616200 }, { "loss": 3.0225, "learning_rate": 5.08214672447287e-06, "epoch": 2.479372090871421, "total_flos": 2365819919173939200, "step": 616300 }, { "loss": 3.03, "learning_rate": 5.081335599076939e-06, "epoch": 2.4797743904156158, "total_flos": 2366208659615969280, "step": 616400 }, { "loss": 3.0625, "learning_rate": 5.080524473681009e-06, "epoch": 2.4801766899598103, "total_flos": 2366589852782776320, "step": 616500 }, { "loss": 2.9975, "learning_rate": 5.079713348285078e-06, "epoch": 2.480578989504005, "total_flos": 2366979921035366400, "step": 616600 }, { "loss": 3.065, "learning_rate": 5.078902222889148e-06, "epoch": 2.4809812890481995, "total_flos": 2367369644057210880, "step": 616700 }, { "loss": 3.0725, "learning_rate": 5.078091097493217e-06, "epoch": 2.481383588592394, "total_flos": 2367762516645703680, "step": 616800 }, { "loss": 3.0325, "learning_rate": 5.077279972097286e-06, "epoch": 2.4817858881365886, "total_flos": 2368136146603560960, "step": 616900 }, { "loss": 2.975, "learning_rate": 5.076468846701357e-06, "epoch": 2.482188187680783, "total_flos": 2368526629133045760, "step": 617000 }, { "loss": 3.0425, "learning_rate": 5.0756577213054255e-06, "epoch": 2.4825904872249778, "total_flos": 2368917472827002880, "step": 617100 }, { "loss": 3.015, "learning_rate": 5.074846595909495e-06, "epoch": 2.482992786769173, "total_flos": 2369300960450457600, "step": 617200 }, { "loss": 3.02, "learning_rate": 5.074035470513564e-06, "epoch": 2.4833950863133674, "total_flos": 2369675052486389760, "step": 617300 }, { "loss": 3.0025, "learning_rate": 5.073224345117635e-06, "epoch": 2.483797385857562, "total_flos": 2370052400313815040, "step": 617400 }, { "loss": 3.065, "learning_rate": 5.072413219721703e-06, "epoch": 2.4841996854017565, "total_flos": 2370439048126402560, "step": 617500 }, { "loss": 3.0375, "learning_rate": 5.071602094325772e-06, "epoch": 2.484601984945951, "total_flos": 2370843961301053440, "step": 617600 }, { "loss": 3.0375, "learning_rate": 5.0707909689298415e-06, "epoch": 2.4850042844901457, "total_flos": 2371234799683768320, "step": 617700 }, { "loss": 3.01, "learning_rate": 5.069979843533912e-06, "epoch": 2.4854065840343402, "total_flos": 2371617883652812800, "step": 617800 }, { "loss": 3.0625, "learning_rate": 5.069168718137982e-06, "epoch": 2.485808883578535, "total_flos": 2371993760266137600, "step": 617900 }, { "loss": 3.0, "learning_rate": 5.06835759274205e-06, "epoch": 2.4862111831227294, "total_flos": 2372377709967667200, "step": 618000 }, { "loss": 3.0175, "learning_rate": 5.067546467346119e-06, "epoch": 2.486613482666924, "total_flos": 2372753533468569600, "step": 618100 }, { "loss": 3.02, "learning_rate": 5.06673534195019e-06, "epoch": 2.4870157822111185, "total_flos": 2373131338062827520, "step": 618200 }, { "loss": 3.055, "learning_rate": 5.065924216554259e-06, "epoch": 2.4874180817553135, "total_flos": 2373519356175912960, "step": 618300 }, { "loss": 2.975, "learning_rate": 5.0651130911583276e-06, "epoch": 2.487820381299508, "total_flos": 2373904968296263680, "step": 618400 }, { "loss": 3.015, "learning_rate": 5.064301965762397e-06, "epoch": 2.4882226808437027, "total_flos": 2374279437430394880, "step": 618500 }, { "loss": 3.03, "learning_rate": 5.063490840366468e-06, "epoch": 2.4886249803878973, "total_flos": 2374634201855815680, "step": 618600 }, { "loss": 3.06, "learning_rate": 5.062679714970537e-06, "epoch": 2.489027279932092, "total_flos": 2375010227183923200, "step": 618700 }, { "loss": 2.9875, "learning_rate": 5.061868589574605e-06, "epoch": 2.4894295794762864, "total_flos": 2375384754741719040, "step": 618800 }, { "loss": 2.98, "learning_rate": 5.061057464178674e-06, "epoch": 2.489831879020481, "total_flos": 2375753811720007680, "step": 618900 }, { "loss": 3.025, "learning_rate": 5.060246338782745e-06, "epoch": 2.4902341785646755, "total_flos": 2376127739107430400, "step": 619000 }, { "loss": 3.0575, "learning_rate": 5.0594352133868145e-06, "epoch": 2.49063647810887, "total_flos": 2376498585974353920, "step": 619100 }, { "loss": 3.015, "learning_rate": 5.058624087990883e-06, "epoch": 2.491038777653065, "total_flos": 2376877458128302080, "step": 619200 }, { "loss": 2.98, "learning_rate": 5.057812962594952e-06, "epoch": 2.4914410771972597, "total_flos": 2377263022447472640, "step": 619300 }, { "loss": 3.045, "learning_rate": 5.057001837199023e-06, "epoch": 2.4918433767414543, "total_flos": 2377645299107696640, "step": 619400 }, { "loss": 3.04, "learning_rate": 5.056190711803092e-06, "epoch": 2.492245676285649, "total_flos": 2378035632922398720, "step": 619500 }, { "loss": 3.04, "learning_rate": 5.0553795864071605e-06, "epoch": 2.4926479758298434, "total_flos": 2378424091868590080, "step": 619600 }, { "loss": 3.035, "learning_rate": 5.05456846101123e-06, "epoch": 2.493050275374038, "total_flos": 2378815939387330560, "step": 619700 }, { "loss": 3.075, "learning_rate": 5.0537573356153005e-06, "epoch": 2.4934525749182326, "total_flos": 2379186090481520640, "step": 619800 }, { "loss": 3.06, "learning_rate": 5.05294621021937e-06, "epoch": 2.493854874462427, "total_flos": 2379581778028400640, "step": 619900 }, { "loss": 3.0375, "learning_rate": 5.052135084823438e-06, "epoch": 2.4942571740066217, "total_flos": 2379965998603284480, "step": 620000 }, { "loss": 3.06, "learning_rate": 5.051323959427507e-06, "epoch": 2.4946594735508163, "total_flos": 2380352800441896960, "step": 620100 }, { "loss": 3.0325, "learning_rate": 5.050512834031578e-06, "epoch": 2.495061773095011, "total_flos": 2380760873805680640, "step": 620200 }, { "loss": 3.035, "learning_rate": 5.049701708635647e-06, "epoch": 2.495464072639206, "total_flos": 2381156003672125440, "step": 620300 }, { "loss": 3.0275, "learning_rate": 5.0488905832397166e-06, "epoch": 2.4958663721834005, "total_flos": 2381546013501050880, "step": 620400 }, { "loss": 2.95, "learning_rate": 5.048079457843785e-06, "epoch": 2.496268671727595, "total_flos": 2381933771363266560, "step": 620500 }, { "loss": 3.045, "learning_rate": 5.047268332447856e-06, "epoch": 2.4966709712717896, "total_flos": 2382297405563228160, "step": 620600 }, { "loss": 3.07, "learning_rate": 5.046457207051925e-06, "epoch": 2.497073270815984, "total_flos": 2382667041466920960, "step": 620700 }, { "loss": 2.985, "learning_rate": 5.045646081655994e-06, "epoch": 2.4974755703601788, "total_flos": 2383053211267706880, "step": 620800 }, { "loss": 2.9875, "learning_rate": 5.0448349562600625e-06, "epoch": 2.4978778699043733, "total_flos": 2383448017148375040, "step": 620900 }, { "loss": 3.01, "learning_rate": 5.0440238308641334e-06, "epoch": 2.498280169448568, "total_flos": 2383826979593441280, "step": 621000 }, { "loss": 3.0625, "learning_rate": 5.043212705468203e-06, "epoch": 2.4986824689927625, "total_flos": 2384223623163924480, "step": 621100 }, { "loss": 3.04, "learning_rate": 5.042401580072272e-06, "epoch": 2.4990847685369575, "total_flos": 2384612113977569280, "step": 621200 }, { "loss": 2.97, "learning_rate": 5.04159045467634e-06, "epoch": 2.4994870680811516, "total_flos": 2384977277815296000, "step": 621300 }, { "loss": 3.08, "learning_rate": 5.040779329280411e-06, "epoch": 2.4998893676253466, "total_flos": 2385364116832604160, "step": 621400 }, { "loss": 2.9975, "learning_rate": 5.03996820388448e-06, "epoch": 2.500291667169541, "total_flos": 2385758205695569920, "step": 621500 }, { "loss": 3.065, "learning_rate": 5.0391570784885495e-06, "epoch": 2.500693966713736, "total_flos": 2386118839043665920, "step": 621600 }, { "loss": 2.9675, "learning_rate": 5.0383459530926195e-06, "epoch": 2.5010962662579304, "total_flos": 2386507621975633920, "step": 621700 }, { "loss": 3.0575, "learning_rate": 5.037534827696689e-06, "epoch": 2.501498565802125, "total_flos": 2386896627979776000, "step": 621800 }, { "loss": 3.0525, "learning_rate": 5.036723702300758e-06, "epoch": 2.5019008653463195, "total_flos": 2387280737018572800, "step": 621900 }, { "loss": 3.0375, "learning_rate": 5.035912576904827e-06, "epoch": 2.502303164890514, "total_flos": 2387665090374512640, "step": 622000 }, { "loss": 3.0225, "learning_rate": 5.035101451508897e-06, "epoch": 2.5027054644347086, "total_flos": 2388047531683246080, "step": 622100 }, { "loss": 3.0475, "learning_rate": 5.034290326112966e-06, "epoch": 2.503107763978903, "total_flos": 2388425601839616000, "step": 622200 }, { "loss": 3.0, "learning_rate": 5.0334792007170355e-06, "epoch": 2.5035100635230982, "total_flos": 2388822930560348160, "step": 622300 }, { "loss": 3.05, "learning_rate": 5.032668075321105e-06, "epoch": 2.5039123630672924, "total_flos": 2389208632971816960, "step": 622400 }, { "loss": 3.015, "learning_rate": 5.031856949925175e-06, "epoch": 2.5043146626114874, "total_flos": 2389589905807257600, "step": 622500 }, { "loss": 3.0, "learning_rate": 5.031045824529244e-06, "epoch": 2.504716962155682, "total_flos": 2389979166751027200, "step": 622600 }, { "loss": 3.075, "learning_rate": 5.030234699133313e-06, "epoch": 2.5051192616998765, "total_flos": 2390350789059317760, "step": 622700 }, { "loss": 3.0575, "learning_rate": 5.029423573737382e-06, "epoch": 2.505521561244071, "total_flos": 2390746906816819200, "step": 622800 }, { "loss": 2.975, "learning_rate": 5.028612448341452e-06, "epoch": 2.5059238607882657, "total_flos": 2391117758994984960, "step": 622900 }, { "loss": 2.995, "learning_rate": 5.027801322945522e-06, "epoch": 2.5063261603324603, "total_flos": 2391490130188431360, "step": 623000 }, { "loss": 3.0875, "learning_rate": 5.026990197549591e-06, "epoch": 2.506728459876655, "total_flos": 2391853998083051520, "step": 623100 }, { "loss": 3.01, "learning_rate": 5.02617907215366e-06, "epoch": 2.50713075942085, "total_flos": 2392252617435648000, "step": 623200 }, { "loss": 3.035, "learning_rate": 5.02536794675773e-06, "epoch": 2.507533058965044, "total_flos": 2392648570544640000, "step": 623300 }, { "loss": 3.08, "learning_rate": 5.024556821361799e-06, "epoch": 2.507935358509239, "total_flos": 2393034851881512960, "step": 623400 }, { "loss": 3.0525, "learning_rate": 5.023745695965868e-06, "epoch": 2.5083376580534336, "total_flos": 2393433370320506880, "step": 623500 }, { "loss": 3.03, "learning_rate": 5.022934570569938e-06, "epoch": 2.508739957597628, "total_flos": 2393816783586570240, "step": 623600 }, { "loss": 3.0075, "learning_rate": 5.022123445174008e-06, "epoch": 2.5091422571418227, "total_flos": 2394212704828108800, "step": 623700 }, { "loss": 3.0475, "learning_rate": 5.021312319778077e-06, "epoch": 2.5095445566860173, "total_flos": 2394608116190392320, "step": 623800 }, { "loss": 3.0575, "learning_rate": 5.020501194382146e-06, "epoch": 2.509946856230212, "total_flos": 2394986287260364800, "step": 623900 }, { "loss": 2.995, "learning_rate": 5.019690068986215e-06, "epoch": 2.5103491557744064, "total_flos": 2395368861350154240, "step": 624000 }, { "loss": 3.02, "learning_rate": 5.018878943590285e-06, "epoch": 2.510751455318601, "total_flos": 2395749783643607040, "step": 624100 }, { "loss": 3.04, "learning_rate": 5.0180678181943545e-06, "epoch": 2.5111537548627956, "total_flos": 2396132575494328320, "step": 624200 }, { "loss": 3.0175, "learning_rate": 5.017256692798424e-06, "epoch": 2.5115560544069906, "total_flos": 2396510550048337920, "step": 624300 }, { "loss": 3.02, "learning_rate": 5.016445567402493e-06, "epoch": 2.5119583539511847, "total_flos": 2396901468099686400, "step": 624400 }, { "loss": 2.98, "learning_rate": 5.015634442006563e-06, "epoch": 2.5123606534953797, "total_flos": 2397268767056793600, "step": 624500 }, { "loss": 3.005, "learning_rate": 5.014823316610632e-06, "epoch": 2.5127629530395743, "total_flos": 2397651813847142400, "step": 624600 }, { "loss": 3.005, "learning_rate": 5.014012191214701e-06, "epoch": 2.513165252583769, "total_flos": 2398019829821952000, "step": 624700 }, { "loss": 2.995, "learning_rate": 5.0132010658187705e-06, "epoch": 2.5135675521279635, "total_flos": 2398404273469009920, "step": 624800 }, { "loss": 3.0575, "learning_rate": 5.0123899404228405e-06, "epoch": 2.513969851672158, "total_flos": 2398793056400977920, "step": 624900 }, { "loss": 3.0575, "learning_rate": 5.01157881502691e-06, "epoch": 2.5143721512163526, "total_flos": 2399186513226117120, "step": 625000 }, { "loss": 3.0275, "learning_rate": 5.010767689630979e-06, "epoch": 2.514774450760547, "total_flos": 2399579088385044480, "step": 625100 }, { "loss": 3.0475, "learning_rate": 5.009956564235048e-06, "epoch": 2.5151767503047417, "total_flos": 2399973251605401600, "step": 625200 }, { "loss": 3.0225, "learning_rate": 5.009145438839118e-06, "epoch": 2.5155790498489363, "total_flos": 2400348219996303360, "step": 625300 }, { "loss": 2.985, "learning_rate": 5.008334313443187e-06, "epoch": 2.5159813493931313, "total_flos": 2400736684253736960, "step": 625400 }, { "loss": 3.0, "learning_rate": 5.0075231880472566e-06, "epoch": 2.516383648937326, "total_flos": 2401123794144399360, "step": 625500 }, { "loss": 3.0175, "learning_rate": 5.006712062651326e-06, "epoch": 2.5167859484815205, "total_flos": 2401500159392010240, "step": 625600 }, { "loss": 3.0325, "learning_rate": 5.005900937255396e-06, "epoch": 2.517188248025715, "total_flos": 2401897302219264000, "step": 625700 }, { "loss": 3.0575, "learning_rate": 5.005089811859465e-06, "epoch": 2.5175905475699096, "total_flos": 2402275096191037440, "step": 625800 }, { "loss": 2.955, "learning_rate": 5.004278686463534e-06, "epoch": 2.517992847114104, "total_flos": 2402662657537290240, "step": 625900 }, { "loss": 3.0175, "learning_rate": 5.003467561067603e-06, "epoch": 2.5183951466582988, "total_flos": 2403037679040614400, "step": 626000 }, { "loss": 3.04, "learning_rate": 5.0026564356716734e-06, "epoch": 2.5187974462024934, "total_flos": 2403424565859102720, "step": 626100 }, { "loss": 3.04, "learning_rate": 5.001845310275743e-06, "epoch": 2.519199745746688, "total_flos": 2403806916876718080, "step": 626200 }, { "loss": 3.01, "learning_rate": 5.001034184879812e-06, "epoch": 2.519602045290883, "total_flos": 2404204410245959680, "step": 626300 }, { "loss": 3.0175, "learning_rate": 5.000223059483882e-06, "epoch": 2.520004344835077, "total_flos": 2404592624875008000, "step": 626400 }, { "loss": 3.055, "learning_rate": 4.999411934087951e-06, "epoch": 2.520406644379272, "total_flos": 2404981540588032000, "step": 626500 }, { "loss": 2.98, "learning_rate": 4.99860080869202e-06, "epoch": 2.5208089439234667, "total_flos": 2405369771150807040, "step": 626600 }, { "loss": 3.0425, "learning_rate": 4.9977896832960895e-06, "epoch": 2.5212112434676612, "total_flos": 2405761050366627840, "step": 626700 }, { "loss": 3.0125, "learning_rate": 4.996978557900159e-06, "epoch": 2.521613543011856, "total_flos": 2406127520769945600, "step": 626800 }, { "loss": 2.975, "learning_rate": 4.996167432504229e-06, "epoch": 2.5220158425560504, "total_flos": 2406504927021035520, "step": 626900 }, { "loss": 2.99, "learning_rate": 4.995356307108298e-06, "epoch": 2.522418142100245, "total_flos": 2406899196466237440, "step": 627000 }, { "loss": 3.0425, "learning_rate": 4.994545181712367e-06, "epoch": 2.5228204416444395, "total_flos": 2407274122367201280, "step": 627100 }, { "loss": 2.975, "learning_rate": 4.993734056316436e-06, "epoch": 2.523222741188634, "total_flos": 2407673554339860480, "step": 627200 }, { "loss": 2.9675, "learning_rate": 4.992922930920506e-06, "epoch": 2.5236250407328287, "total_flos": 2408063930644500480, "step": 627300 }, { "loss": 3.125, "learning_rate": 4.9921118055245755e-06, "epoch": 2.5240273402770237, "total_flos": 2408442192005591040, "step": 627400 }, { "loss": 3.075, "learning_rate": 4.991300680128645e-06, "epoch": 2.524429639821218, "total_flos": 2408822721267118080, "step": 627500 }, { "loss": 3.0, "learning_rate": 4.990489554732714e-06, "epoch": 2.524831939365413, "total_flos": 2409199049336033280, "step": 627600 }, { "loss": 3.0, "learning_rate": 4.989678429336784e-06, "epoch": 2.5252342389096074, "total_flos": 2409575504874762240, "step": 627700 }, { "loss": 2.9975, "learning_rate": 4.988867303940853e-06, "epoch": 2.525636538453802, "total_flos": 2409965535948656640, "step": 627800 }, { "loss": 3.01, "learning_rate": 4.988056178544922e-06, "epoch": 2.5260388379979966, "total_flos": 2410368483963678720, "step": 627900 }, { "loss": 3.045, "learning_rate": 4.9872450531489915e-06, "epoch": 2.526441137542191, "total_flos": 2410770853053296640, "step": 628000 }, { "loss": 3.015, "learning_rate": 4.986433927753062e-06, "epoch": 2.5268434370863857, "total_flos": 2411161303715328000, "step": 628100 }, { "loss": 3.085, "learning_rate": 4.985622802357131e-06, "epoch": 2.5272457366305803, "total_flos": 2411535002719334400, "step": 628200 }, { "loss": 3.025, "learning_rate": 4.9848116769612e-06, "epoch": 2.527648036174775, "total_flos": 2411918007019745280, "step": 628300 }, { "loss": 2.99, "learning_rate": 4.984000551565269e-06, "epoch": 2.5280503357189694, "total_flos": 2412307602571776000, "step": 628400 }, { "loss": 2.9775, "learning_rate": 4.983189426169339e-06, "epoch": 2.5284526352631644, "total_flos": 2412683256112926720, "step": 628500 }, { "loss": 2.96, "learning_rate": 4.982378300773408e-06, "epoch": 2.528854934807359, "total_flos": 2413048818293821440, "step": 628600 }, { "loss": 3.0575, "learning_rate": 4.981567175377478e-06, "epoch": 2.5292572343515536, "total_flos": 2413417567220060160, "step": 628700 }, { "loss": 3.01, "learning_rate": 4.980756049981547e-06, "epoch": 2.529659533895748, "total_flos": 2413800205044756480, "step": 628800 }, { "loss": 3.1, "learning_rate": 4.979944924585617e-06, "epoch": 2.5300618334399427, "total_flos": 2414188005396910080, "step": 628900 }, { "loss": 2.995, "learning_rate": 4.979133799189686e-06, "epoch": 2.5304641329841373, "total_flos": 2414567732660858880, "step": 629000 }, { "loss": 2.9725, "learning_rate": 4.978322673793756e-06, "epoch": 2.530866432528332, "total_flos": 2414939158453186560, "step": 629100 }, { "loss": 3.015, "learning_rate": 4.9775115483978244e-06, "epoch": 2.5312687320725265, "total_flos": 2415322518606827520, "step": 629200 }, { "loss": 2.99, "learning_rate": 4.9767004230018945e-06, "epoch": 2.531671031616721, "total_flos": 2415708693718855680, "step": 629300 }, { "loss": 2.9975, "learning_rate": 4.975889297605964e-06, "epoch": 2.532073331160916, "total_flos": 2416093063008522240, "step": 629400 }, { "loss": 3.0475, "learning_rate": 4.975078172210034e-06, "epoch": 2.53247563070511, "total_flos": 2416470532994519040, "step": 629500 }, { "loss": 3.03, "learning_rate": 4.974267046814102e-06, "epoch": 2.532877930249305, "total_flos": 2416854445517352960, "step": 629600 }, { "loss": 2.9925, "learning_rate": 4.973455921418172e-06, "epoch": 2.5332802297934998, "total_flos": 2417224405406822400, "step": 629700 }, { "loss": 3.04, "learning_rate": 4.972644796022241e-06, "epoch": 2.5336825293376943, "total_flos": 2417611053219409920, "step": 629800 }, { "loss": 3.0275, "learning_rate": 4.971833670626311e-06, "epoch": 2.534084828881889, "total_flos": 2417990658324787200, "step": 629900 }, { "loss": 3.0125, "learning_rate": 4.97102254523038e-06, "epoch": 2.5344871284260835, "total_flos": 2418374347775447040, "step": 630000 }, { "loss": 3.075, "learning_rate": 4.97021141983445e-06, "epoch": 2.534889427970278, "total_flos": 2418755211645235200, "step": 630100 }, { "loss": 2.97, "learning_rate": 4.969400294438519e-06, "epoch": 2.5352917275144726, "total_flos": 2419134460897382400, "step": 630200 }, { "loss": 3.035, "learning_rate": 4.968589169042589e-06, "epoch": 2.535694027058667, "total_flos": 2419507384460021760, "step": 630300 }, { "loss": 2.9725, "learning_rate": 4.967778043646657e-06, "epoch": 2.5360963266028618, "total_flos": 2419898684920811520, "step": 630400 }, { "loss": 3.0475, "learning_rate": 4.966966918250727e-06, "epoch": 2.536498626147057, "total_flos": 2420269489297797120, "step": 630500 }, { "loss": 3.0275, "learning_rate": 4.9661557928547966e-06, "epoch": 2.536900925691251, "total_flos": 2420654416267898880, "step": 630600 }, { "loss": 3.035, "learning_rate": 4.965344667458867e-06, "epoch": 2.537303225235446, "total_flos": 2421021316881838080, "step": 630700 }, { "loss": 2.9775, "learning_rate": 4.964533542062935e-06, "epoch": 2.5377055247796405, "total_flos": 2421390347303915520, "step": 630800 }, { "loss": 2.9825, "learning_rate": 4.963722416667005e-06, "epoch": 2.538107824323835, "total_flos": 2421761273839472640, "step": 630900 }, { "loss": 3.02, "learning_rate": 4.962911291271074e-06, "epoch": 2.5385101238680297, "total_flos": 2422128301923225600, "step": 631000 }, { "loss": 3.01, "learning_rate": 4.962100165875144e-06, "epoch": 2.5389124234122242, "total_flos": 2422508252259348480, "step": 631100 }, { "loss": 3.0425, "learning_rate": 4.9612890404792134e-06, "epoch": 2.539314722956419, "total_flos": 2422896493444608000, "step": 631200 }, { "loss": 2.9875, "learning_rate": 4.960477915083283e-06, "epoch": 2.5397170225006134, "total_flos": 2423287315893596160, "step": 631300 }, { "loss": 3.0275, "learning_rate": 4.959666789687353e-06, "epoch": 2.5401193220448084, "total_flos": 2423675020643389440, "step": 631400 }, { "loss": 3.0725, "learning_rate": 4.958855664291422e-06, "epoch": 2.5405216215890025, "total_flos": 2424082807200092160, "step": 631500 }, { "loss": 3.0225, "learning_rate": 4.958044538895491e-06, "epoch": 2.5409239211331975, "total_flos": 2424467797905100800, "step": 631600 }, { "loss": 3.0525, "learning_rate": 4.95723341349956e-06, "epoch": 2.541326220677392, "total_flos": 2424846277027123200, "step": 631700 }, { "loss": 2.9775, "learning_rate": 4.95642228810363e-06, "epoch": 2.5417285202215867, "total_flos": 2425241699011891200, "step": 631800 }, { "loss": 2.95, "learning_rate": 4.9556111627076995e-06, "epoch": 2.5421308197657813, "total_flos": 2425619222110310400, "step": 631900 }, { "loss": 3.0675, "learning_rate": 4.954800037311769e-06, "epoch": 2.542533119309976, "total_flos": 2425998450117488640, "step": 632000 }, { "loss": 2.98, "learning_rate": 4.953988911915838e-06, "epoch": 2.5429354188541704, "total_flos": 2426386516031754240, "step": 632100 }, { "loss": 3.0525, "learning_rate": 4.953177786519908e-06, "epoch": 2.543337718398365, "total_flos": 2426775543280865280, "step": 632200 }, { "loss": 3.0025, "learning_rate": 4.952366661123977e-06, "epoch": 2.5437400179425596, "total_flos": 2427149900878909440, "step": 632300 }, { "loss": 3.005, "learning_rate": 4.951555535728046e-06, "epoch": 2.544142317486754, "total_flos": 2427527179660185600, "step": 632400 }, { "loss": 3.025, "learning_rate": 4.9507444103321155e-06, "epoch": 2.544544617030949, "total_flos": 2427919760130355200, "step": 632500 }, { "loss": 3.0675, "learning_rate": 4.9499332849361856e-06, "epoch": 2.5449469165751433, "total_flos": 2428301388819025920, "step": 632600 }, { "loss": 3.0525, "learning_rate": 4.949122159540255e-06, "epoch": 2.5453492161193383, "total_flos": 2428673212954521600, "step": 632700 }, { "loss": 3.035, "learning_rate": 4.948311034144324e-06, "epoch": 2.545751515663533, "total_flos": 2429040368508088320, "step": 632800 }, { "loss": 3.045, "learning_rate": 4.947499908748393e-06, "epoch": 2.5461538152077274, "total_flos": 2429426734824837120, "step": 632900 }, { "loss": 2.99, "learning_rate": 4.946688783352463e-06, "epoch": 2.546556114751922, "total_flos": 2429799923949588480, "step": 633000 }, { "loss": 3.015, "learning_rate": 4.945877657956532e-06, "epoch": 2.5469584142961166, "total_flos": 2430177468292976640, "step": 633100 }, { "loss": 3.0575, "learning_rate": 4.945066532560602e-06, "epoch": 2.547360713840311, "total_flos": 2430568609416499200, "step": 633200 }, { "loss": 3.03, "learning_rate": 4.944255407164671e-06, "epoch": 2.5477630133845057, "total_flos": 2430956924959150080, "step": 633300 }, { "loss": 2.9975, "learning_rate": 4.943444281768741e-06, "epoch": 2.5481653129287003, "total_flos": 2431343827711365120, "step": 633400 }, { "loss": 3.0575, "learning_rate": 4.94263315637281e-06, "epoch": 2.548567612472895, "total_flos": 2431740938671165440, "step": 633500 }, { "loss": 2.995, "learning_rate": 4.941822030976879e-06, "epoch": 2.54896991201709, "total_flos": 2432135654260715520, "step": 633600 }, { "loss": 3.015, "learning_rate": 4.941010905580948e-06, "epoch": 2.5493722115612845, "total_flos": 2432536408732692480, "step": 633700 }, { "loss": 3.01, "learning_rate": 4.9401997801850185e-06, "epoch": 2.549774511105479, "total_flos": 2432922275792670720, "step": 633800 }, { "loss": 3.0375, "learning_rate": 4.939388654789088e-06, "epoch": 2.5501768106496736, "total_flos": 2433306597281157120, "step": 633900 }, { "loss": 2.9975, "learning_rate": 4.938577529393157e-06, "epoch": 2.550579110193868, "total_flos": 2433684651503800320, "step": 634000 }, { "loss": 3.0, "learning_rate": 4.937766403997226e-06, "epoch": 2.5509814097380628, "total_flos": 2434056257878364160, "step": 634100 }, { "loss": 2.9675, "learning_rate": 4.936955278601296e-06, "epoch": 2.5513837092822573, "total_flos": 2434441062689894400, "step": 634200 }, { "loss": 2.98, "learning_rate": 4.936144153205365e-06, "epoch": 2.551786008826452, "total_flos": 2434826111818567680, "step": 634300 }, { "loss": 3.0275, "learning_rate": 4.9353330278094345e-06, "epoch": 2.5521883083706465, "total_flos": 2435213630674882560, "step": 634400 }, { "loss": 3.005, "learning_rate": 4.934521902413504e-06, "epoch": 2.5525906079148415, "total_flos": 2435591371534233600, "step": 634500 }, { "loss": 3.0175, "learning_rate": 4.933710777017574e-06, "epoch": 2.5529929074590356, "total_flos": 2435976298504335360, "step": 634600 }, { "loss": 3.0, "learning_rate": 4.932899651621643e-06, "epoch": 2.5533952070032306, "total_flos": 2436356992414371840, "step": 634700 }, { "loss": 3.01, "learning_rate": 4.932088526225712e-06, "epoch": 2.553797506547425, "total_flos": 2436735386556518400, "step": 634800 }, { "loss": 3.02, "learning_rate": 4.931277400829781e-06, "epoch": 2.55419980609162, "total_flos": 2437121280172707840, "step": 634900 }, { "loss": 2.9875, "learning_rate": 4.930466275433851e-06, "epoch": 2.5546021056358144, "total_flos": 2437496365410938880, "step": 635000 }, { "loss": 3.0375, "learning_rate": 4.9296551500379205e-06, "epoch": 2.555004405180009, "total_flos": 2437901799087329280, "step": 635100 }, { "loss": 3.0475, "learning_rate": 4.92884402464199e-06, "epoch": 2.5554067047242035, "total_flos": 2438272454749532160, "step": 635200 }, { "loss": 3.045, "learning_rate": 4.928032899246059e-06, "epoch": 2.555809004268398, "total_flos": 2438654529582551040, "step": 635300 }, { "loss": 2.9525, "learning_rate": 4.927221773850129e-06, "epoch": 2.5562113038125927, "total_flos": 2439033422981468160, "step": 635400 }, { "loss": 2.9925, "learning_rate": 4.926410648454198e-06, "epoch": 2.5566136033567872, "total_flos": 2439428977747292160, "step": 635500 }, { "loss": 3.0525, "learning_rate": 4.925599523058267e-06, "epoch": 2.5570159029009822, "total_flos": 2439796940609679360, "step": 635600 }, { "loss": 3.045, "learning_rate": 4.9247883976623366e-06, "epoch": 2.5574182024451764, "total_flos": 2440190684241899520, "step": 635700 }, { "loss": 3.01, "learning_rate": 4.923977272266407e-06, "epoch": 2.5578205019893714, "total_flos": 2440569715733114880, "step": 635800 }, { "loss": 3.015, "learning_rate": 4.923166146870476e-06, "epoch": 2.558222801533566, "total_flos": 2440968287284531200, "step": 635900 }, { "loss": 2.9775, "learning_rate": 4.922355021474545e-06, "epoch": 2.5586251010777605, "total_flos": 2441362291167621120, "step": 636000 }, { "loss": 3.0225, "learning_rate": 4.921543896078615e-06, "epoch": 2.559027400621955, "total_flos": 2441727152264540160, "step": 636100 }, { "loss": 3.075, "learning_rate": 4.920732770682684e-06, "epoch": 2.5594297001661497, "total_flos": 2442124619077570560, "step": 636200 }, { "loss": 2.9425, "learning_rate": 4.9199216452867534e-06, "epoch": 2.5598319997103443, "total_flos": 2442497717911203840, "step": 636300 }, { "loss": 3.015, "learning_rate": 4.919110519890823e-06, "epoch": 2.560234299254539, "total_flos": 2442873382074839040, "step": 636400 }, { "loss": 3.0325, "learning_rate": 4.918299394494893e-06, "epoch": 2.5606365987987334, "total_flos": 2443268134843084800, "step": 636500 }, { "loss": 3.015, "learning_rate": 4.917488269098962e-06, "epoch": 2.561038898342928, "total_flos": 2443649312076165120, "step": 636600 }, { "loss": 3.0325, "learning_rate": 4.916677143703031e-06, "epoch": 2.561441197887123, "total_flos": 2444021523932344320, "step": 636700 }, { "loss": 3.01, "learning_rate": 4.9158660183071e-06, "epoch": 2.5618434974313176, "total_flos": 2444406716464558080, "step": 636800 }, { "loss": 2.9625, "learning_rate": 4.91505489291117e-06, "epoch": 2.562245796975512, "total_flos": 2444777462417879040, "step": 636900 }, { "loss": 3.025, "learning_rate": 4.9142437675152395e-06, "epoch": 2.5626480965197067, "total_flos": 2445178668345446400, "step": 637000 }, { "loss": 3.0, "learning_rate": 4.913432642119309e-06, "epoch": 2.5630503960639013, "total_flos": 2445567796508160000, "step": 637100 }, { "loss": 3.03, "learning_rate": 4.912621516723378e-06, "epoch": 2.563452695608096, "total_flos": 2445961805702492160, "step": 637200 }, { "loss": 2.995, "learning_rate": 4.911810391327448e-06, "epoch": 2.5638549951522904, "total_flos": 2446342733307187200, "step": 637300 }, { "loss": 3.0025, "learning_rate": 4.910999265931517e-06, "epoch": 2.564257294696485, "total_flos": 2446725365820641280, "step": 637400 }, { "loss": 2.995, "learning_rate": 4.910188140535586e-06, "epoch": 2.5646595942406796, "total_flos": 2447102984521420800, "step": 637500 }, { "loss": 3.0225, "learning_rate": 4.9093770151396555e-06, "epoch": 2.5650618937848746, "total_flos": 2447486063179223040, "step": 637600 }, { "loss": 3.025, "learning_rate": 4.9085658897437256e-06, "epoch": 2.5654641933290687, "total_flos": 2447876604132372480, "step": 637700 }, { "loss": 2.96, "learning_rate": 4.907754764347795e-06, "epoch": 2.5658664928732637, "total_flos": 2448255789649612800, "step": 637800 }, { "loss": 3.0225, "learning_rate": 4.906943638951864e-06, "epoch": 2.5662687924174583, "total_flos": 2448661393285754880, "step": 637900 }, { "loss": 2.9925, "learning_rate": 4.906132513555933e-06, "epoch": 2.566671091961653, "total_flos": 2449025457696337920, "step": 638000 }, { "loss": 2.985, "learning_rate": 4.905321388160003e-06, "epoch": 2.5670733915058475, "total_flos": 2449415849934704640, "step": 638100 }, { "loss": 3.0525, "learning_rate": 4.904510262764072e-06, "epoch": 2.567475691050042, "total_flos": 2449788826609766400, "step": 638200 }, { "loss": 3.045, "learning_rate": 4.903699137368142e-06, "epoch": 2.5678779905942366, "total_flos": 2450157623337185280, "step": 638300 }, { "loss": 2.9875, "learning_rate": 4.902888011972211e-06, "epoch": 2.568280290138431, "total_flos": 2450538986463744000, "step": 638400 }, { "loss": 3.01, "learning_rate": 4.902076886576281e-06, "epoch": 2.5686825896826257, "total_flos": 2450917258447319040, "step": 638500 }, { "loss": 2.9725, "learning_rate": 4.90126576118035e-06, "epoch": 2.5690848892268203, "total_flos": 2451303598207856640, "step": 638600 }, { "loss": 2.9775, "learning_rate": 4.900454635784419e-06, "epoch": 2.5694871887710153, "total_flos": 2451698765252997120, "step": 638700 }, { "loss": 3.02, "learning_rate": 4.899643510388488e-06, "epoch": 2.5698894883152095, "total_flos": 2452071980933959680, "step": 638800 }, { "loss": 3.05, "learning_rate": 4.8988323849925585e-06, "epoch": 2.5702917878594045, "total_flos": 2452456796367974400, "step": 638900 }, { "loss": 3.02, "learning_rate": 4.898021259596628e-06, "epoch": 2.570694087403599, "total_flos": 2452831817871298560, "step": 639000 }, { "loss": 3.005, "learning_rate": 4.897210134200697e-06, "epoch": 2.5710963869477936, "total_flos": 2453232174000107520, "step": 639100 }, { "loss": 3.0025, "learning_rate": 4.896399008804766e-06, "epoch": 2.571498686491988, "total_flos": 2453615396061450240, "step": 639200 }, { "loss": 2.9775, "learning_rate": 4.895587883408836e-06, "epoch": 2.571900986036183, "total_flos": 2453995596025958400, "step": 639300 }, { "loss": 3.035, "learning_rate": 4.894776758012905e-06, "epoch": 2.5723032855803774, "total_flos": 2454375833169162240, "step": 639400 }, { "loss": 3.0225, "learning_rate": 4.8939656326169745e-06, "epoch": 2.572705585124572, "total_flos": 2454759511997337600, "step": 639500 }, { "loss": 3.0025, "learning_rate": 4.893154507221044e-06, "epoch": 2.573107884668767, "total_flos": 2455156580467200000, "step": 639600 }, { "loss": 2.9925, "learning_rate": 4.892343381825114e-06, "epoch": 2.573510184212961, "total_flos": 2455545140326993920, "step": 639700 }, { "loss": 3.0125, "learning_rate": 4.891532256429183e-06, "epoch": 2.573912483757156, "total_flos": 2455922233214791680, "step": 639800 }, { "loss": 2.975, "learning_rate": 4.890721131033252e-06, "epoch": 2.5743147833013507, "total_flos": 2456310017633218560, "step": 639900 }, { "loss": 3.0525, "learning_rate": 4.889910005637321e-06, "epoch": 2.5747170828455452, "total_flos": 2456692469564436480, "step": 640000 }, { "loss": 3.0425, "learning_rate": 4.889098880241391e-06, "epoch": 2.57511938238974, "total_flos": 2457080572657397760, "step": 640100 }, { "loss": 3.02, "learning_rate": 4.8882877548454605e-06, "epoch": 2.5755216819339344, "total_flos": 2457442969337917440, "step": 640200 }, { "loss": 3.02, "learning_rate": 4.887476629449531e-06, "epoch": 2.575923981478129, "total_flos": 2457834572539514880, "step": 640300 }, { "loss": 3.0, "learning_rate": 4.886665504053599e-06, "epoch": 2.5763262810223235, "total_flos": 2458220328063406080, "step": 640400 }, { "loss": 3.005, "learning_rate": 4.885854378657669e-06, "epoch": 2.576728580566518, "total_flos": 2458604909802762240, "step": 640500 }, { "loss": 3.0, "learning_rate": 4.885043253261739e-06, "epoch": 2.5771308801107127, "total_flos": 2458976824229376000, "step": 640600 }, { "loss": 2.9425, "learning_rate": 4.884232127865808e-06, "epoch": 2.5775331796549077, "total_flos": 2459376096864768000, "step": 640700 }, { "loss": 3.055, "learning_rate": 4.883421002469877e-06, "epoch": 2.577935479199102, "total_flos": 2459756737662382080, "step": 640800 }, { "loss": 3.02, "learning_rate": 4.882609877073947e-06, "epoch": 2.578337778743297, "total_flos": 2460152058733547520, "step": 640900 }, { "loss": 2.9925, "learning_rate": 4.881798751678017e-06, "epoch": 2.5787400782874914, "total_flos": 2460549334341857280, "step": 641000 }, { "loss": 2.985, "learning_rate": 4.880987626282086e-06, "epoch": 2.579142377831686, "total_flos": 2460910907779829760, "step": 641100 }, { "loss": 3.0075, "learning_rate": 4.880176500886155e-06, "epoch": 2.5795446773758806, "total_flos": 2461284585538867200, "step": 641200 }, { "loss": 3.0175, "learning_rate": 4.879365375490224e-06, "epoch": 2.579946976920075, "total_flos": 2461666787841699840, "step": 641300 }, { "loss": 2.9925, "learning_rate": 4.878554250094294e-06, "epoch": 2.5803492764642697, "total_flos": 2462048740516147200, "step": 641400 }, { "loss": 2.9925, "learning_rate": 4.8777431246983635e-06, "epoch": 2.5807515760084643, "total_flos": 2462439270846812160, "step": 641500 }, { "loss": 3.02, "learning_rate": 4.876931999302433e-06, "epoch": 2.581153875552659, "total_flos": 2462834066104995840, "step": 641600 }, { "loss": 3.0025, "learning_rate": 4.876120873906502e-06, "epoch": 2.5815561750968534, "total_flos": 2463215110557020160, "step": 641700 }, { "loss": 3.0225, "learning_rate": 4.875309748510572e-06, "epoch": 2.5819584746410484, "total_flos": 2463587455194255360, "step": 641800 }, { "loss": 2.965, "learning_rate": 4.874498623114641e-06, "epoch": 2.582360774185243, "total_flos": 2463978814078709760, "step": 641900 }, { "loss": 3.035, "learning_rate": 4.87368749771871e-06, "epoch": 2.5827630737294376, "total_flos": 2464368701749063680, "step": 642000 }, { "loss": 3.0125, "learning_rate": 4.8728763723227795e-06, "epoch": 2.583165373273632, "total_flos": 2464756762352087040, "step": 642100 }, { "loss": 2.9675, "learning_rate": 4.8720652469268495e-06, "epoch": 2.5835676728178267, "total_flos": 2465138321994608640, "step": 642200 }, { "loss": 3.0075, "learning_rate": 4.871254121530919e-06, "epoch": 2.5839699723620213, "total_flos": 2465518298886942720, "step": 642300 }, { "loss": 2.985, "learning_rate": 4.870442996134988e-06, "epoch": 2.584372271906216, "total_flos": 2465888465914859520, "step": 642400 }, { "loss": 3.015, "learning_rate": 4.869631870739057e-06, "epoch": 2.5847745714504105, "total_flos": 2466300039387279360, "step": 642500 }, { "loss": 3.025, "learning_rate": 4.868820745343127e-06, "epoch": 2.585176870994605, "total_flos": 2466684652994088960, "step": 642600 }, { "loss": 3.04, "learning_rate": 4.868009619947196e-06, "epoch": 2.5855791705388, "total_flos": 2467068554894438400, "step": 642700 }, { "loss": 3.0475, "learning_rate": 4.8671984945512656e-06, "epoch": 2.585981470082994, "total_flos": 2467459733196656640, "step": 642800 }, { "loss": 3.005, "learning_rate": 4.866387369155335e-06, "epoch": 2.586383769627189, "total_flos": 2467848054050549760, "step": 642900 }, { "loss": 2.9825, "learning_rate": 4.865576243759405e-06, "epoch": 2.5867860691713838, "total_flos": 2468245165010350080, "step": 643000 }, { "loss": 2.9825, "learning_rate": 4.864765118363474e-06, "epoch": 2.5871883687155783, "total_flos": 2468628695123742720, "step": 643100 }, { "loss": 3.02, "learning_rate": 4.863953992967543e-06, "epoch": 2.587590668259773, "total_flos": 2469010881492848640, "step": 643200 }, { "loss": 2.9675, "learning_rate": 4.863142867571612e-06, "epoch": 2.5879929678039675, "total_flos": 2469388133717913600, "step": 643300 }, { "loss": 2.95, "learning_rate": 4.8623317421756824e-06, "epoch": 2.588395267348162, "total_flos": 2469775859712675840, "step": 643400 }, { "loss": 3.0075, "learning_rate": 4.861520616779752e-06, "epoch": 2.5887975668923566, "total_flos": 2470167887813652480, "step": 643500 }, { "loss": 2.9975, "learning_rate": 4.860709491383821e-06, "epoch": 2.589199866436551, "total_flos": 2470551354192138240, "step": 643600 }, { "loss": 3.0425, "learning_rate": 4.85989836598789e-06, "epoch": 2.5896021659807458, "total_flos": 2470921738980986880, "step": 643700 }, { "loss": 2.9775, "learning_rate": 4.85908724059196e-06, "epoch": 2.590004465524941, "total_flos": 2471305470921584640, "step": 643800 }, { "loss": 3.02, "learning_rate": 4.858276115196029e-06, "epoch": 2.590406765069135, "total_flos": 2471683859752488960, "step": 643900 }, { "loss": 2.9975, "learning_rate": 4.8574649898000985e-06, "epoch": 2.59080906461333, "total_flos": 2472071840686878720, "step": 644000 }, { "loss": 2.9825, "learning_rate": 4.856653864404168e-06, "epoch": 2.5912113641575245, "total_flos": 2472449916154490880, "step": 644100 }, { "loss": 2.99, "learning_rate": 4.855842739008238e-06, "epoch": 2.591613663701719, "total_flos": 2472840191545528320, "step": 644200 }, { "loss": 3.0275, "learning_rate": 4.855031613612307e-06, "epoch": 2.5920159632459137, "total_flos": 2473209859316674560, "step": 644300 }, { "loss": 3.025, "learning_rate": 4.854220488216376e-06, "epoch": 2.5924182627901082, "total_flos": 2473592215645532160, "step": 644400 }, { "loss": 2.985, "learning_rate": 4.853409362820445e-06, "epoch": 2.592820562334303, "total_flos": 2473988827348561920, "step": 644500 }, { "loss": 2.9975, "learning_rate": 4.852598237424515e-06, "epoch": 2.5932228618784974, "total_flos": 2474374466025123840, "step": 644600 }, { "loss": 3.04, "learning_rate": 4.8517871120285845e-06, "epoch": 2.593625161422692, "total_flos": 2474743538937139200, "step": 644700 }, { "loss": 2.9925, "learning_rate": 4.850975986632654e-06, "epoch": 2.5940274609668865, "total_flos": 2475127053116805120, "step": 644800 }, { "loss": 2.9925, "learning_rate": 4.850164861236723e-06, "epoch": 2.5944297605110815, "total_flos": 2475504300030627840, "step": 644900 }, { "loss": 2.9675, "learning_rate": 4.849353735840793e-06, "epoch": 2.594832060055276, "total_flos": 2475867912985620480, "step": 645000 }, { "loss": 2.935, "learning_rate": 4.848542610444862e-06, "epoch": 2.5952343595994707, "total_flos": 2476254874161500160, "step": 645100 }, { "loss": 2.98, "learning_rate": 4.847731485048931e-06, "epoch": 2.5956366591436653, "total_flos": 2476662071170314240, "step": 645200 }, { "loss": 2.95, "learning_rate": 4.846920359653001e-06, "epoch": 2.59603895868786, "total_flos": 2477055512061726720, "step": 645300 }, { "loss": 3.02, "learning_rate": 4.846109234257071e-06, "epoch": 2.5964412582320544, "total_flos": 2477437719675801600, "step": 645400 }, { "loss": 3.02, "learning_rate": 4.84529810886114e-06, "epoch": 2.596843557776249, "total_flos": 2477834804079390720, "step": 645500 }, { "loss": 2.9975, "learning_rate": 4.844486983465209e-06, "epoch": 2.5972458573204436, "total_flos": 2478224112824340480, "step": 645600 }, { "loss": 3.02, "learning_rate": 4.843675858069279e-06, "epoch": 2.597648156864638, "total_flos": 2478606591311769600, "step": 645700 }, { "loss": 3.0275, "learning_rate": 4.842864732673348e-06, "epoch": 2.598050456408833, "total_flos": 2478999060245852160, "step": 645800 }, { "loss": 3.0525, "learning_rate": 4.842053607277417e-06, "epoch": 2.5984527559530273, "total_flos": 2479364946412523520, "step": 645900 }, { "loss": 2.97, "learning_rate": 4.841242481881487e-06, "epoch": 2.5988550554972223, "total_flos": 2479758690044743680, "step": 646000 }, { "loss": 3.0475, "learning_rate": 4.840431356485557e-06, "epoch": 2.599257355041417, "total_flos": 2480148344020439040, "step": 646100 }, { "loss": 3.0725, "learning_rate": 4.839620231089626e-06, "epoch": 2.5996596545856114, "total_flos": 2480523418636185600, "step": 646200 }, { "loss": 3.0325, "learning_rate": 4.838809105693695e-06, "epoch": 2.600061954129806, "total_flos": 2480905881189888000, "step": 646300 }, { "loss": 3.01, "learning_rate": 4.837997980297764e-06, "epoch": 2.6004642536740006, "total_flos": 2481287732950732800, "step": 646400 }, { "loss": 3.015, "learning_rate": 4.837186854901834e-06, "epoch": 2.600866553218195, "total_flos": 2481672080995430400, "step": 646500 }, { "loss": 3.0325, "learning_rate": 4.8363757295059035e-06, "epoch": 2.6012688527623897, "total_flos": 2482058760675471360, "step": 646600 }, { "loss": 3.04, "learning_rate": 4.835564604109973e-06, "epoch": 2.6016711523065843, "total_flos": 2482435917298176000, "step": 646700 }, { "loss": 3.025, "learning_rate": 4.834753478714042e-06, "epoch": 2.602073451850779, "total_flos": 2482826182066728960, "step": 646800 }, { "loss": 3.0525, "learning_rate": 4.833942353318112e-06, "epoch": 2.602475751394974, "total_flos": 2483203476781731840, "step": 646900 }, { "loss": 2.99, "learning_rate": 4.833131227922181e-06, "epoch": 2.602878050939168, "total_flos": 2483578912561950720, "step": 647000 }, { "loss": 2.9325, "learning_rate": 4.83232010252625e-06, "epoch": 2.603280350483363, "total_flos": 2483961736280125440, "step": 647100 }, { "loss": 3.015, "learning_rate": 4.8315089771303195e-06, "epoch": 2.6036826500275576, "total_flos": 2484357089218744320, "step": 647200 }, { "loss": 2.965, "learning_rate": 4.8306978517343895e-06, "epoch": 2.604084949571752, "total_flos": 2484735807346667520, "step": 647300 }, { "loss": 2.985, "learning_rate": 4.829886726338459e-06, "epoch": 2.6044872491159468, "total_flos": 2485133847773859840, "step": 647400 }, { "loss": 3.0075, "learning_rate": 4.829075600942528e-06, "epoch": 2.6048895486601413, "total_flos": 2485506133987430400, "step": 647500 }, { "loss": 2.99, "learning_rate": 4.828264475546597e-06, "epoch": 2.605291848204336, "total_flos": 2485902103030149120, "step": 647600 }, { "loss": 3.0275, "learning_rate": 4.827453350150667e-06, "epoch": 2.6056941477485305, "total_flos": 2486280311278817280, "step": 647700 }, { "loss": 2.9875, "learning_rate": 4.826642224754736e-06, "epoch": 2.6060964472927255, "total_flos": 2486671447091097600, "step": 647800 }, { "loss": 2.945, "learning_rate": 4.8258310993588056e-06, "epoch": 2.6064987468369196, "total_flos": 2487061706548408320, "step": 647900 }, { "loss": 2.995, "learning_rate": 4.825019973962875e-06, "epoch": 2.6069010463811146, "total_flos": 2487436568714465280, "step": 648000 }, { "loss": 2.9675, "learning_rate": 4.824208848566945e-06, "epoch": 2.607303345925309, "total_flos": 2487824942680780800, "step": 648100 }, { "loss": 2.9475, "learning_rate": 4.823397723171014e-06, "epoch": 2.607705645469504, "total_flos": 2488200909585223680, "step": 648200 }, { "loss": 2.9725, "learning_rate": 4.822586597775083e-06, "epoch": 2.6081079450136984, "total_flos": 2488587429927997440, "step": 648300 }, { "loss": 2.9525, "learning_rate": 4.821775472379152e-06, "epoch": 2.608510244557893, "total_flos": 2488977620339159040, "step": 648400 }, { "loss": 3.0, "learning_rate": 4.8209643469832224e-06, "epoch": 2.6089125441020875, "total_flos": 2489379474238279680, "step": 648500 }, { "loss": 3.015, "learning_rate": 4.820153221587292e-06, "epoch": 2.609314843646282, "total_flos": 2489760991390863360, "step": 648600 }, { "loss": 2.945, "learning_rate": 4.819342096191361e-06, "epoch": 2.6097171431904767, "total_flos": 2490137287592325120, "step": 648700 }, { "loss": 2.995, "learning_rate": 4.81853097079543e-06, "epoch": 2.6101194427346712, "total_flos": 2490514592929812480, "step": 648800 }, { "loss": 3.03, "learning_rate": 4.8177198453995e-06, "epoch": 2.6105217422788662, "total_flos": 2490898489518919680, "step": 648900 }, { "loss": 3.0175, "learning_rate": 4.816908720003569e-06, "epoch": 2.6109240418230604, "total_flos": 2491303848837918720, "step": 649000 }, { "loss": 2.9825, "learning_rate": 4.8160975946076385e-06, "epoch": 2.6113263413672554, "total_flos": 2491681557829816320, "step": 649100 }, { "loss": 3.0025, "learning_rate": 4.815286469211708e-06, "epoch": 2.61172864091145, "total_flos": 2492069597187870720, "step": 649200 }, { "loss": 2.95, "learning_rate": 4.814475343815778e-06, "epoch": 2.6121309404556445, "total_flos": 2492445165749145600, "step": 649300 }, { "loss": 2.9675, "learning_rate": 4.813664218419847e-06, "epoch": 2.612533239999839, "total_flos": 2492819061269114880, "step": 649400 }, { "loss": 3.03, "learning_rate": 4.812853093023916e-06, "epoch": 2.6129355395440337, "total_flos": 2493189015847342080, "step": 649500 }, { "loss": 3.0325, "learning_rate": 4.812041967627985e-06, "epoch": 2.6133378390882283, "total_flos": 2493578080275148800, "step": 649600 }, { "loss": 2.995, "learning_rate": 4.811230842232055e-06, "epoch": 2.613740138632423, "total_flos": 2493953569167790080, "step": 649700 }, { "loss": 2.9675, "learning_rate": 4.8104197168361245e-06, "epoch": 2.6141424381766174, "total_flos": 2494342288364851200, "step": 649800 }, { "loss": 2.9975, "learning_rate": 4.809608591440194e-06, "epoch": 2.614544737720812, "total_flos": 2494733822520299520, "step": 649900 }, { "loss": 2.9775, "learning_rate": 4.808797466044263e-06, "epoch": 2.614947037265007, "total_flos": 2495119939208663040, "step": 650000 }, { "loss": 2.9875, "learning_rate": 4.807986340648333e-06, "epoch": 2.615349336809201, "total_flos": 2495508286618767360, "step": 650100 }, { "loss": 2.9875, "learning_rate": 4.807175215252403e-06, "epoch": 2.615751636353396, "total_flos": 2495872935265996800, "step": 650200 }, { "loss": 2.96, "learning_rate": 4.806364089856471e-06, "epoch": 2.6161539358975907, "total_flos": 2496234174095708160, "step": 650300 }, { "loss": 2.99, "learning_rate": 4.805552964460541e-06, "epoch": 2.6165562354417853, "total_flos": 2496614538708725760, "step": 650400 }, { "loss": 3.0075, "learning_rate": 4.804741839064611e-06, "epoch": 2.61695853498598, "total_flos": 2497015776503746560, "step": 650500 }, { "loss": 3.0375, "learning_rate": 4.803930713668681e-06, "epoch": 2.6173608345301744, "total_flos": 2497398308103598080, "step": 650600 }, { "loss": 2.9825, "learning_rate": 4.803119588272749e-06, "epoch": 2.617763134074369, "total_flos": 2497781546098667520, "step": 650700 }, { "loss": 3.0, "learning_rate": 4.802308462876819e-06, "epoch": 2.6181654336185636, "total_flos": 2498140925993594880, "step": 650800 }, { "loss": 3.0, "learning_rate": 4.801497337480888e-06, "epoch": 2.6185677331627586, "total_flos": 2498540835978055680, "step": 650900 }, { "loss": 2.985, "learning_rate": 4.800686212084958e-06, "epoch": 2.6189700327069527, "total_flos": 2498928806289960960, "step": 651000 }, { "loss": 2.9875, "learning_rate": 4.799875086689027e-06, "epoch": 2.6193723322511477, "total_flos": 2499294692456632320, "step": 651100 }, { "loss": 2.9975, "learning_rate": 4.799063961293097e-06, "epoch": 2.6197746317953423, "total_flos": 2499680920681082880, "step": 651200 }, { "loss": 3.0325, "learning_rate": 4.798252835897166e-06, "epoch": 2.620176931339537, "total_flos": 2500081345856040960, "step": 651300 }, { "loss": 2.97, "learning_rate": 4.797441710501236e-06, "epoch": 2.6205792308837315, "total_flos": 2500475668413665280, "step": 651400 }, { "loss": 2.985, "learning_rate": 4.796630585105304e-06, "epoch": 2.620981530427926, "total_flos": 2500860781277245440, "step": 651500 }, { "loss": 3.0625, "learning_rate": 4.795819459709374e-06, "epoch": 2.6213838299721206, "total_flos": 2501254296526049280, "step": 651600 }, { "loss": 2.94, "learning_rate": 4.7950083343134435e-06, "epoch": 2.621786129516315, "total_flos": 2501644800300503040, "step": 651700 }, { "loss": 2.9975, "learning_rate": 4.7941972089175135e-06, "epoch": 2.6221884290605098, "total_flos": 2502031527781724160, "step": 651800 }, { "loss": 2.995, "learning_rate": 4.793386083521583e-06, "epoch": 2.6225907286047043, "total_flos": 2502417809118597120, "step": 651900 }, { "loss": 3.02, "learning_rate": 4.792574958125652e-06, "epoch": 2.6229930281488993, "total_flos": 2502813948121067520, "step": 652000 }, { "loss": 3.03, "learning_rate": 4.791763832729721e-06, "epoch": 2.6233953276930935, "total_flos": 2503185708521656320, "step": 652100 }, { "loss": 2.9825, "learning_rate": 4.790952707333791e-06, "epoch": 2.6237976272372885, "total_flos": 2503576350388408320, "step": 652200 }, { "loss": 3.0, "learning_rate": 4.79014158193786e-06, "epoch": 2.624199926781483, "total_flos": 2503954261207511040, "step": 652300 }, { "loss": 3.05, "learning_rate": 4.7893304565419295e-06, "epoch": 2.6246022263256776, "total_flos": 2504334620509286400, "step": 652400 }, { "loss": 2.9425, "learning_rate": 4.788519331145999e-06, "epoch": 2.625004525869872, "total_flos": 2504729596349706240, "step": 652500 }, { "loss": 2.975, "learning_rate": 4.787708205750069e-06, "epoch": 2.625406825414067, "total_flos": 2505138360174981120, "step": 652600 }, { "loss": 3.005, "learning_rate": 4.786897080354138e-06, "epoch": 2.6258091249582614, "total_flos": 2505525905587507200, "step": 652700 }, { "loss": 3.0025, "learning_rate": 4.786085954958207e-06, "epoch": 2.626211424502456, "total_flos": 2505917317584384000, "step": 652800 }, { "loss": 2.96, "learning_rate": 4.785274829562276e-06, "epoch": 2.6266137240466505, "total_flos": 2506299211835166720, "step": 652900 }, { "loss": 2.9675, "learning_rate": 4.784463704166346e-06, "epoch": 2.627016023590845, "total_flos": 2506685503794524160, "step": 653000 }, { "loss": 2.965, "learning_rate": 4.783652578770416e-06, "epoch": 2.62741832313504, "total_flos": 2507058039636480000, "step": 653100 }, { "loss": 3.0125, "learning_rate": 4.782841453374485e-06, "epoch": 2.6278206226792347, "total_flos": 2507434309281730560, "step": 653200 }, { "loss": 2.965, "learning_rate": 4.782030327978554e-06, "epoch": 2.6282229222234292, "total_flos": 2507811906737541120, "step": 653300 }, { "loss": 3.04, "learning_rate": 4.781219202582624e-06, "epoch": 2.628625221767624, "total_flos": 2508203154085908480, "step": 653400 }, { "loss": 2.93, "learning_rate": 4.780408077186693e-06, "epoch": 2.6290275213118184, "total_flos": 2508568471949660160, "step": 653500 }, { "loss": 2.99, "learning_rate": 4.7795969517907624e-06, "epoch": 2.629429820856013, "total_flos": 2508947051985285120, "step": 653600 }, { "loss": 2.9925, "learning_rate": 4.778785826394832e-06, "epoch": 2.6298321204002075, "total_flos": 2509328298264514560, "step": 653700 }, { "loss": 2.995, "learning_rate": 4.777974700998902e-06, "epoch": 2.630234419944402, "total_flos": 2509717798214184960, "step": 653800 }, { "loss": 2.98, "learning_rate": 4.777163575602971e-06, "epoch": 2.6306367194885967, "total_flos": 2510100027073228800, "step": 653900 }, { "loss": 3.03, "learning_rate": 4.77635245020704e-06, "epoch": 2.6310390190327917, "total_flos": 2510467798730895360, "step": 654000 }, { "loss": 3.0625, "learning_rate": 4.775541324811109e-06, "epoch": 2.631441318576986, "total_flos": 2510862509009203200, "step": 654100 }, { "loss": 2.9825, "learning_rate": 4.774730199415179e-06, "epoch": 2.631843618121181, "total_flos": 2511244254545203200, "step": 654200 }, { "loss": 3.02, "learning_rate": 4.7739190740192485e-06, "epoch": 2.6322459176653754, "total_flos": 2511626807390023680, "step": 654300 }, { "loss": 2.9975, "learning_rate": 4.773107948623318e-06, "epoch": 2.63264821720957, "total_flos": 2512008457323663360, "step": 654400 }, { "loss": 3.0025, "learning_rate": 4.772296823227387e-06, "epoch": 2.6330505167537646, "total_flos": 2512402843616194560, "step": 654500 }, { "loss": 2.9875, "learning_rate": 4.771485697831457e-06, "epoch": 2.633452816297959, "total_flos": 2512794186566922240, "step": 654600 }, { "loss": 2.9575, "learning_rate": 4.770674572435526e-06, "epoch": 2.6338551158421537, "total_flos": 2513201744740208640, "step": 654700 }, { "loss": 2.9925, "learning_rate": 4.769863447039595e-06, "epoch": 2.6342574153863483, "total_flos": 2513580643450368000, "step": 654800 }, { "loss": 3.0375, "learning_rate": 4.769052321643665e-06, "epoch": 2.634659714930543, "total_flos": 2513956944963072000, "step": 654900 }, { "loss": 3.0675, "learning_rate": 4.7682411962477346e-06, "epoch": 2.6350620144747374, "total_flos": 2514331939910184960, "step": 655000 }, { "loss": 2.9825, "learning_rate": 4.767430070851804e-06, "epoch": 2.6354643140189324, "total_flos": 2514708671633510400, "step": 655100 }, { "loss": 3.005, "learning_rate": 4.766618945455873e-06, "epoch": 2.6358666135631266, "total_flos": 2515096514475601920, "step": 655200 }, { "loss": 2.9825, "learning_rate": 4.765807820059943e-06, "epoch": 2.6362689131073216, "total_flos": 2515494384943042560, "step": 655300 }, { "loss": 3.0, "learning_rate": 4.764996694664012e-06, "epoch": 2.636671212651516, "total_flos": 2515869109016801280, "step": 655400 }, { "loss": 2.965, "learning_rate": 4.764185569268081e-06, "epoch": 2.6370735121957107, "total_flos": 2516241921043353600, "step": 655500 }, { "loss": 3.045, "learning_rate": 4.763374443872151e-06, "epoch": 2.6374758117399053, "total_flos": 2516616002456801280, "step": 655600 }, { "loss": 3.0175, "learning_rate": 4.762563318476221e-06, "epoch": 2.6378781112841, "total_flos": 2516992277413294080, "step": 655700 }, { "loss": 3.015, "learning_rate": 4.76175219308029e-06, "epoch": 2.6382804108282945, "total_flos": 2517372705761218560, "step": 655800 }, { "loss": 3.0375, "learning_rate": 4.760941067684359e-06, "epoch": 2.638682710372489, "total_flos": 2517765079092940800, "step": 655900 }, { "loss": 2.99, "learning_rate": 4.760129942288428e-06, "epoch": 2.6390850099166836, "total_flos": 2518143558214963200, "step": 656000 }, { "loss": 2.9825, "learning_rate": 4.759318816892498e-06, "epoch": 2.639487309460878, "total_flos": 2518514505995489280, "step": 656100 }, { "loss": 2.915, "learning_rate": 4.7585076914965675e-06, "epoch": 2.639889609005073, "total_flos": 2518888815792353280, "step": 656200 }, { "loss": 2.985, "learning_rate": 4.757696566100637e-06, "epoch": 2.6402919085492678, "total_flos": 2519279728532459520, "step": 656300 }, { "loss": 2.975, "learning_rate": 4.756885440704706e-06, "epoch": 2.6406942080934623, "total_flos": 2519678151369093120, "step": 656400 }, { "loss": 2.985, "learning_rate": 4.756074315308776e-06, "epoch": 2.641096507637657, "total_flos": 2520055026495959040, "step": 656500 }, { "loss": 3.0375, "learning_rate": 4.755263189912845e-06, "epoch": 2.6414988071818515, "total_flos": 2520444876987617280, "step": 656600 }, { "loss": 2.955, "learning_rate": 4.754452064516914e-06, "epoch": 2.641901106726046, "total_flos": 2520829559640576000, "step": 656700 }, { "loss": 3.04, "learning_rate": 4.7536409391209835e-06, "epoch": 2.6423034062702406, "total_flos": 2521204421806632960, "step": 656800 }, { "loss": 3.04, "learning_rate": 4.7528298137250535e-06, "epoch": 2.642705705814435, "total_flos": 2521577005449768960, "step": 656900 }, { "loss": 2.935, "learning_rate": 4.752018688329123e-06, "epoch": 2.6431080053586298, "total_flos": 2521972369010872320, "step": 657000 }, { "loss": 3.0275, "learning_rate": 4.751207562933192e-06, "epoch": 2.643510304902825, "total_flos": 2522345271328542720, "step": 657100 }, { "loss": 3.0075, "learning_rate": 4.750396437537261e-06, "epoch": 2.643912604447019, "total_flos": 2522731818227527680, "step": 657200 }, { "loss": 2.975, "learning_rate": 4.749585312141331e-06, "epoch": 2.644314903991214, "total_flos": 2523103275887308800, "step": 657300 }, { "loss": 3.0825, "learning_rate": 4.7487741867454e-06, "epoch": 2.6447172035354085, "total_flos": 2523485865910824960, "step": 657400 }, { "loss": 2.9675, "learning_rate": 4.7479630613494695e-06, "epoch": 2.645119503079603, "total_flos": 2523876157235589120, "step": 657500 }, { "loss": 3.015, "learning_rate": 4.747151935953539e-06, "epoch": 2.6455218026237977, "total_flos": 2524259145602273280, "step": 657600 }, { "loss": 3.0025, "learning_rate": 4.746340810557609e-06, "epoch": 2.6459241021679922, "total_flos": 2524642771318026240, "step": 657700 }, { "loss": 3.01, "learning_rate": 4.745529685161678e-06, "epoch": 2.646326401712187, "total_flos": 2525044922646712320, "step": 657800 }, { "loss": 2.99, "learning_rate": 4.744718559765747e-06, "epoch": 2.6467287012563814, "total_flos": 2525430168291348480, "step": 657900 }, { "loss": 2.99, "learning_rate": 4.743907434369816e-06, "epoch": 2.647131000800576, "total_flos": 2525823407355555840, "step": 658000 }, { "loss": 2.97, "learning_rate": 4.743096308973886e-06, "epoch": 2.6475333003447705, "total_flos": 2526221777079767040, "step": 658100 }, { "loss": 2.95, "learning_rate": 4.742285183577956e-06, "epoch": 2.6479355998889655, "total_flos": 2526603565105704960, "step": 658200 }, { "loss": 2.9775, "learning_rate": 4.741474058182025e-06, "epoch": 2.6483378994331597, "total_flos": 2526992289614008320, "step": 658300 }, { "loss": 3.025, "learning_rate": 4.740662932786094e-06, "epoch": 2.6487401989773547, "total_flos": 2527376765128519680, "step": 658400 }, { "loss": 2.98, "learning_rate": 4.739851807390164e-06, "epoch": 2.6491424985215493, "total_flos": 2527766031383531520, "step": 658500 }, { "loss": 2.9925, "learning_rate": 4.739040681994233e-06, "epoch": 2.649544798065744, "total_flos": 2528146539400089600, "step": 658600 }, { "loss": 2.96, "learning_rate": 4.7382295565983024e-06, "epoch": 2.6499470976099384, "total_flos": 2528514677533470720, "step": 658700 }, { "loss": 3.025, "learning_rate": 4.737418431202372e-06, "epoch": 2.650349397154133, "total_flos": 2528899774463324160, "step": 658800 }, { "loss": 3.0175, "learning_rate": 4.736607305806442e-06, "epoch": 2.6507516966983276, "total_flos": 2529276330915655680, "step": 658900 }, { "loss": 2.975, "learning_rate": 4.735796180410511e-06, "epoch": 2.651153996242522, "total_flos": 2529660105346191360, "step": 659000 }, { "loss": 3.0175, "learning_rate": 4.73498505501458e-06, "epoch": 2.651556295786717, "total_flos": 2530039126214922240, "step": 659100 }, { "loss": 3.0175, "learning_rate": 4.734173929618649e-06, "epoch": 2.6519585953309113, "total_flos": 2530431913823539200, "step": 659200 }, { "loss": 3.0, "learning_rate": 4.733362804222719e-06, "epoch": 2.6523608948751063, "total_flos": 2530818083624325120, "step": 659300 }, { "loss": 2.975, "learning_rate": 4.7325516788267885e-06, "epoch": 2.652763194419301, "total_flos": 2531198379191193600, "step": 659400 }, { "loss": 2.9925, "learning_rate": 4.731740553430858e-06, "epoch": 2.6531654939634954, "total_flos": 2531602851532738560, "step": 659500 }, { "loss": 2.9925, "learning_rate": 4.730929428034928e-06, "epoch": 2.65356779350769, "total_flos": 2532007228271923200, "step": 659600 }, { "loss": 3.0325, "learning_rate": 4.730118302638997e-06, "epoch": 2.6539700930518846, "total_flos": 2532392771346124800, "step": 659700 }, { "loss": 3.05, "learning_rate": 4.729307177243066e-06, "epoch": 2.654372392596079, "total_flos": 2532762640944476160, "step": 659800 }, { "loss": 2.96, "learning_rate": 4.728496051847135e-06, "epoch": 2.6547746921402737, "total_flos": 2533145108809420800, "step": 659900 }, { "loss": 3.0075, "learning_rate": 4.727684926451205e-06, "epoch": 2.6551769916844683, "total_flos": 2533523938473431040, "step": 660000 }, { "loss": 3.0025, "learning_rate": 4.7268738010552746e-06, "epoch": 2.655579291228663, "total_flos": 2533929860784107520, "step": 660100 }, { "loss": 2.97, "learning_rate": 4.726062675659344e-06, "epoch": 2.655981590772858, "total_flos": 2534328602295275520, "step": 660200 }, { "loss": 3.0, "learning_rate": 4.725251550263413e-06, "epoch": 2.656383890317052, "total_flos": 2534722951409111040, "step": 660300 }, { "loss": 3.0475, "learning_rate": 4.724440424867483e-06, "epoch": 2.656786189861247, "total_flos": 2535101446464860160, "step": 660400 }, { "loss": 2.98, "learning_rate": 4.723629299471552e-06, "epoch": 2.6571884894054416, "total_flos": 2535469775802961920, "step": 660500 }, { "loss": 2.9775, "learning_rate": 4.722818174075622e-06, "epoch": 2.657590788949636, "total_flos": 2535854400032256000, "step": 660600 }, { "loss": 3.0125, "learning_rate": 4.722007048679691e-06, "epoch": 2.6579930884938308, "total_flos": 2536249354627706880, "step": 660700 }, { "loss": 2.9525, "learning_rate": 4.721195923283761e-06, "epoch": 2.6583953880380253, "total_flos": 2536625125016186880, "step": 660800 }, { "loss": 2.9725, "learning_rate": 4.72038479788783e-06, "epoch": 2.65879768758222, "total_flos": 2537020860364247040, "step": 660900 }, { "loss": 3.04, "learning_rate": 4.7195736724919e-06, "epoch": 2.6591999871264145, "total_flos": 2537403450387763200, "step": 661000 }, { "loss": 2.99, "learning_rate": 4.718762547095968e-06, "epoch": 2.659602286670609, "total_flos": 2537781047843573760, "step": 661100 }, { "loss": 3.0075, "learning_rate": 4.717951421700038e-06, "epoch": 2.6600045862148036, "total_flos": 2538178403120517120, "step": 661200 }, { "loss": 2.9875, "learning_rate": 4.7171402963041075e-06, "epoch": 2.6604068857589986, "total_flos": 2538546658101227520, "step": 661300 }, { "loss": 2.96, "learning_rate": 4.7163291709081775e-06, "epoch": 2.660809185303193, "total_flos": 2538925068177100800, "step": 661400 }, { "loss": 2.95, "learning_rate": 4.715518045512246e-06, "epoch": 2.661211484847388, "total_flos": 2539327931212247040, "step": 661500 }, { "loss": 3.0075, "learning_rate": 4.714706920116316e-06, "epoch": 2.6616137843915824, "total_flos": 2539703366992465920, "step": 661600 }, { "loss": 2.98, "learning_rate": 4.713895794720385e-06, "epoch": 2.662016083935777, "total_flos": 2540104918150778880, "step": 661700 }, { "loss": 3.0125, "learning_rate": 4.713084669324455e-06, "epoch": 2.6624183834799715, "total_flos": 2540483471630192640, "step": 661800 }, { "loss": 2.965, "learning_rate": 4.7122735439285235e-06, "epoch": 2.662820683024166, "total_flos": 2540854892111278080, "step": 661900 }, { "loss": 2.9925, "learning_rate": 4.7114624185325935e-06, "epoch": 2.6632229825683607, "total_flos": 2541237646783303680, "step": 662000 }, { "loss": 3.0275, "learning_rate": 4.710651293136663e-06, "epoch": 2.6636252821125552, "total_flos": 2541619944688496640, "step": 662100 }, { "loss": 3.0, "learning_rate": 4.709840167740733e-06, "epoch": 2.6640275816567502, "total_flos": 2541999050537103360, "step": 662200 }, { "loss": 2.9775, "learning_rate": 4.709029042344801e-06, "epoch": 2.6644298812009444, "total_flos": 2542380780139376640, "step": 662300 }, { "loss": 2.9725, "learning_rate": 4.708217916948871e-06, "epoch": 2.6648321807451394, "total_flos": 2542770349135196160, "step": 662400 }, { "loss": 2.9975, "learning_rate": 4.70740679155294e-06, "epoch": 2.665234480289334, "total_flos": 2543146124834918400, "step": 662500 }, { "loss": 2.9975, "learning_rate": 4.70659566615701e-06, "epoch": 2.6656367798335285, "total_flos": 2543525469689425920, "step": 662600 }, { "loss": 3.03, "learning_rate": 4.705784540761079e-06, "epoch": 2.666039079377723, "total_flos": 2543907427675115520, "step": 662700 }, { "loss": 2.965, "learning_rate": 4.704973415365149e-06, "epoch": 2.6664413789219177, "total_flos": 2544293682455777280, "step": 662800 }, { "loss": 3.0375, "learning_rate": 4.704162289969218e-06, "epoch": 2.6668436784661123, "total_flos": 2544665761530900480, "step": 662900 }, { "loss": 2.995, "learning_rate": 4.703351164573288e-06, "epoch": 2.667245978010307, "total_flos": 2545053413168271360, "step": 663000 }, { "loss": 2.975, "learning_rate": 4.702540039177357e-06, "epoch": 2.6676482775545014, "total_flos": 2545439784796262400, "step": 663100 }, { "loss": 2.9425, "learning_rate": 4.701728913781426e-06, "epoch": 2.668050577098696, "total_flos": 2545807184666972160, "step": 663200 }, { "loss": 3.0375, "learning_rate": 4.700917788385496e-06, "epoch": 2.668452876642891, "total_flos": 2546179460258058240, "step": 663300 }, { "loss": 2.9525, "learning_rate": 4.700106662989566e-06, "epoch": 2.668855176187085, "total_flos": 2546554747323494400, "step": 663400 }, { "loss": 2.9925, "learning_rate": 4.699295537593635e-06, "epoch": 2.66925747573128, "total_flos": 2546947784560496640, "step": 663500 }, { "loss": 3.0175, "learning_rate": 4.698484412197704e-06, "epoch": 2.6696597752754747, "total_flos": 2547338681366876160, "step": 663600 }, { "loss": 3.01, "learning_rate": 4.697673286801773e-06, "epoch": 2.6700620748196693, "total_flos": 2547725998395985920, "step": 663700 }, { "loss": 2.99, "learning_rate": 4.696862161405843e-06, "epoch": 2.670464374363864, "total_flos": 2548115025645096960, "step": 663800 }, { "loss": 2.9775, "learning_rate": 4.6960510360099125e-06, "epoch": 2.6708666739080584, "total_flos": 2548495422125568000, "step": 663900 }, { "loss": 3.0175, "learning_rate": 4.695239910613982e-06, "epoch": 2.671268973452253, "total_flos": 2548879069086289920, "step": 664000 }, { "loss": 2.99, "learning_rate": 4.694428785218051e-06, "epoch": 2.6716712729964476, "total_flos": 2549279345546465280, "step": 664100 }, { "loss": 2.96, "learning_rate": 4.693617659822121e-06, "epoch": 2.672073572540642, "total_flos": 2549673317562101760, "step": 664200 }, { "loss": 3.02, "learning_rate": 4.69280653442619e-06, "epoch": 2.6724758720848367, "total_flos": 2550061388787609600, "step": 664300 }, { "loss": 2.9875, "learning_rate": 4.691995409030259e-06, "epoch": 2.6728781716290317, "total_flos": 2550447080576593920, "step": 664400 }, { "loss": 2.9575, "learning_rate": 4.691184283634329e-06, "epoch": 2.6732804711732263, "total_flos": 2550844924487823360, "step": 664500 }, { "loss": 2.9825, "learning_rate": 4.6903731582383985e-06, "epoch": 2.673682770717421, "total_flos": 2551234004849356800, "step": 664600 }, { "loss": 3.0425, "learning_rate": 4.689562032842468e-06, "epoch": 2.6740850702616155, "total_flos": 2551625576183500800, "step": 664700 }, { "loss": 2.975, "learning_rate": 4.688750907446537e-06, "epoch": 2.67448736980581, "total_flos": 2552012999437455360, "step": 664800 }, { "loss": 3.06, "learning_rate": 4.687939782050607e-06, "epoch": 2.6748896693500046, "total_flos": 2552404974426009600, "step": 664900 }, { "loss": 2.955, "learning_rate": 4.687128656654676e-06, "epoch": 2.675291968894199, "total_flos": 2552778535337717760, "step": 665000 }, { "loss": 2.995, "learning_rate": 4.686317531258745e-06, "epoch": 2.6756942684383938, "total_flos": 2553164970700615680, "step": 665100 }, { "loss": 3.0575, "learning_rate": 4.6855064058628146e-06, "epoch": 2.6760965679825883, "total_flos": 2553543614471147520, "step": 665200 }, { "loss": 2.98, "learning_rate": 4.684695280466885e-06, "epoch": 2.6764988675267833, "total_flos": 2553927128650813440, "step": 665300 }, { "loss": 2.985, "learning_rate": 4.683884155070954e-06, "epoch": 2.6769011670709775, "total_flos": 2554316543620608000, "step": 665400 }, { "loss": 2.985, "learning_rate": 4.683073029675023e-06, "epoch": 2.6773034666151725, "total_flos": 2554709155958231040, "step": 665500 }, { "loss": 3.0025, "learning_rate": 4.682261904279092e-06, "epoch": 2.677705766159367, "total_flos": 2555094088239575040, "step": 665600 }, { "loss": 3.0325, "learning_rate": 4.681450778883162e-06, "epoch": 2.6781080657035616, "total_flos": 2555460319636992000, "step": 665700 }, { "loss": 3.025, "learning_rate": 4.6806396534872314e-06, "epoch": 2.678510365247756, "total_flos": 2555835898820751360, "step": 665800 }, { "loss": 3.0425, "learning_rate": 4.679828528091301e-06, "epoch": 2.678912664791951, "total_flos": 2556234847470366720, "step": 665900 }, { "loss": 2.9675, "learning_rate": 4.67901740269537e-06, "epoch": 2.6793149643361454, "total_flos": 2556615254573322240, "step": 666000 }, { "loss": 3.0, "learning_rate": 4.67820627729944e-06, "epoch": 2.67971726388034, "total_flos": 2556989320053043200, "step": 666100 }, { "loss": 3.0125, "learning_rate": 4.677395151903509e-06, "epoch": 2.6801195634245345, "total_flos": 2557369084495687680, "step": 666200 }, { "loss": 3.01, "learning_rate": 4.676584026507578e-06, "epoch": 2.680521862968729, "total_flos": 2557764352454430720, "step": 666300 }, { "loss": 2.9875, "learning_rate": 4.6757729011116475e-06, "epoch": 2.680924162512924, "total_flos": 2558134498237378560, "step": 666400 }, { "loss": 3.02, "learning_rate": 4.6749617757157175e-06, "epoch": 2.681326462057118, "total_flos": 2558514719446855680, "step": 666500 }, { "loss": 3.0, "learning_rate": 4.674150650319787e-06, "epoch": 2.6817287616013132, "total_flos": 2558899603927019520, "step": 666600 }, { "loss": 3.01, "learning_rate": 4.673339524923856e-06, "epoch": 2.682131061145508, "total_flos": 2559288880804515840, "step": 666700 }, { "loss": 2.9775, "learning_rate": 4.672528399527925e-06, "epoch": 2.6825333606897024, "total_flos": 2559679772299653120, "step": 666800 }, { "loss": 3.035, "learning_rate": 4.671717274131995e-06, "epoch": 2.682935660233897, "total_flos": 2560055585178071040, "step": 666900 }, { "loss": 2.985, "learning_rate": 4.670906148736064e-06, "epoch": 2.6833379597780915, "total_flos": 2560433129521459200, "step": 667000 }, { "loss": 3.005, "learning_rate": 4.6700950233401335e-06, "epoch": 2.683740259322286, "total_flos": 2560812761183047680, "step": 667100 }, { "loss": 2.94, "learning_rate": 4.669283897944203e-06, "epoch": 2.6841425588664807, "total_flos": 2561206690708746240, "step": 667200 }, { "loss": 2.965, "learning_rate": 4.668472772548273e-06, "epoch": 2.6845448584106757, "total_flos": 2561582546077102080, "step": 667300 }, { "loss": 2.9575, "learning_rate": 4.667661647152342e-06, "epoch": 2.68494715795487, "total_flos": 2561962777909063680, "step": 667400 }, { "loss": 2.935, "learning_rate": 4.666850521756411e-06, "epoch": 2.685349457499065, "total_flos": 2562344640292392960, "step": 667500 }, { "loss": 2.9675, "learning_rate": 4.66603939636048e-06, "epoch": 2.6857517570432594, "total_flos": 2562746632283811840, "step": 667600 }, { "loss": 2.9875, "learning_rate": 4.66522827096455e-06, "epoch": 2.686154056587454, "total_flos": 2563134092716462080, "step": 667700 }, { "loss": 2.9975, "learning_rate": 4.66441714556862e-06, "epoch": 2.6865563561316486, "total_flos": 2563520485589422080, "step": 667800 }, { "loss": 2.9625, "learning_rate": 4.663606020172689e-06, "epoch": 2.686958655675843, "total_flos": 2563893876541378560, "step": 667900 }, { "loss": 3.0225, "learning_rate": 4.662794894776758e-06, "epoch": 2.6873609552200377, "total_flos": 2564274597007626240, "step": 668000 }, { "loss": 3.035, "learning_rate": 4.661983769380828e-06, "epoch": 2.6877632547642323, "total_flos": 2564669689695375360, "step": 668100 }, { "loss": 2.9925, "learning_rate": 4.661172643984897e-06, "epoch": 2.688165554308427, "total_flos": 2565047722673049600, "step": 668200 }, { "loss": 3.025, "learning_rate": 4.660361518588966e-06, "epoch": 2.6885678538526214, "total_flos": 2565431783910666240, "step": 668300 }, { "loss": 2.9675, "learning_rate": 4.659550393193036e-06, "epoch": 2.6889701533968164, "total_flos": 2565821092655616000, "step": 668400 }, { "loss": 2.99, "learning_rate": 4.658739267797106e-06, "epoch": 2.6893724529410106, "total_flos": 2566201297931366400, "step": 668500 }, { "loss": 2.9525, "learning_rate": 4.657928142401175e-06, "epoch": 2.6897747524852056, "total_flos": 2566602461368995840, "step": 668600 }, { "loss": 2.95, "learning_rate": 4.657117017005244e-06, "epoch": 2.6901770520294, "total_flos": 2566972415947223040, "step": 668700 }, { "loss": 3.0075, "learning_rate": 4.656305891609313e-06, "epoch": 2.6905793515735947, "total_flos": 2567343751448432640, "step": 668800 }, { "loss": 3.0475, "learning_rate": 4.655494766213383e-06, "epoch": 2.6909816511177893, "total_flos": 2567725890016358400, "step": 668900 }, { "loss": 3.015, "learning_rate": 4.6546836408174525e-06, "epoch": 2.691383950661984, "total_flos": 2568116340678389760, "step": 669000 }, { "loss": 2.9525, "learning_rate": 4.653872515421522e-06, "epoch": 2.6917862502061785, "total_flos": 2568507545536819200, "step": 669100 }, { "loss": 2.94, "learning_rate": 4.653061390025592e-06, "epoch": 2.692188549750373, "total_flos": 2568878604853432320, "step": 669200 }, { "loss": 2.9175, "learning_rate": 4.652250264629661e-06, "epoch": 2.6925908492945676, "total_flos": 2569274812902051840, "step": 669300 }, { "loss": 2.925, "learning_rate": 4.65143913923373e-06, "epoch": 2.692993148838762, "total_flos": 2569641724138475520, "step": 669400 }, { "loss": 2.945, "learning_rate": 4.650628013837799e-06, "epoch": 2.693395448382957, "total_flos": 2570025418900377600, "step": 669500 }, { "loss": 2.975, "learning_rate": 4.649816888441869e-06, "epoch": 2.6937977479271518, "total_flos": 2570412927134208000, "step": 669600 }, { "loss": 2.9225, "learning_rate": 4.6490057630459385e-06, "epoch": 2.6942000474713463, "total_flos": 2570779535629824000, "step": 669700 }, { "loss": 3.0175, "learning_rate": 4.648194637650008e-06, "epoch": 2.694602347015541, "total_flos": 2571175616208629760, "step": 669800 }, { "loss": 2.9775, "learning_rate": 4.647383512254077e-06, "epoch": 2.6950046465597355, "total_flos": 2571563416560783360, "step": 669900 }, { "loss": 3.01, "learning_rate": 4.646572386858147e-06, "epoch": 2.69540694610393, "total_flos": 2571949272998277120, "step": 670000 }, { "loss": 3.0275, "learning_rate": 4.645761261462216e-06, "epoch": 2.6958092456481246, "total_flos": 2572336659073536000, "step": 670100 }, { "loss": 2.97, "learning_rate": 4.644950136066285e-06, "epoch": 2.696211545192319, "total_flos": 2572720741556121600, "step": 670200 }, { "loss": 2.995, "learning_rate": 4.6441390106703546e-06, "epoch": 2.6966138447365138, "total_flos": 2573100272304107520, "step": 670300 }, { "loss": 2.965, "learning_rate": 4.643327885274425e-06, "epoch": 2.697016144280709, "total_flos": 2573489395155578880, "step": 670400 }, { "loss": 2.9875, "learning_rate": 4.642516759878494e-06, "epoch": 2.697418443824903, "total_flos": 2573868931214807040, "step": 670500 }, { "loss": 2.995, "learning_rate": 4.641705634482563e-06, "epoch": 2.697820743369098, "total_flos": 2574267635547279360, "step": 670600 }, { "loss": 2.97, "learning_rate": 4.640894509086632e-06, "epoch": 2.6982230429132925, "total_flos": 2574648313523589120, "step": 670700 }, { "loss": 2.955, "learning_rate": 4.640083383690702e-06, "epoch": 2.698625342457487, "total_flos": 2575032364138721280, "step": 670800 }, { "loss": 3.0675, "learning_rate": 4.6392722582947714e-06, "epoch": 2.6990276420016817, "total_flos": 2575426166194606080, "step": 670900 }, { "loss": 2.9825, "learning_rate": 4.638461132898841e-06, "epoch": 2.6994299415458762, "total_flos": 2575796928081653760, "step": 671000 }, { "loss": 2.9625, "learning_rate": 4.63765000750291e-06, "epoch": 2.699832241090071, "total_flos": 2576188929626419200, "step": 671100 }, { "loss": 2.975, "learning_rate": 4.63683888210698e-06, "epoch": 2.7002345406342654, "total_flos": 2576564901842104320, "step": 671200 }, { "loss": 2.9675, "learning_rate": 4.636027756711049e-06, "epoch": 2.70063684017846, "total_flos": 2576950933550592000, "step": 671300 }, { "loss": 2.9625, "learning_rate": 4.635216631315118e-06, "epoch": 2.7010391397226545, "total_flos": 2577327203195842560, "step": 671400 }, { "loss": 3.0025, "learning_rate": 4.6344055059191875e-06, "epoch": 2.7014414392668495, "total_flos": 2577705353020846080, "step": 671500 }, { "loss": 2.985, "learning_rate": 4.6335943805232575e-06, "epoch": 2.7018437388110437, "total_flos": 2578087443787591680, "step": 671600 }, { "loss": 2.965, "learning_rate": 4.632783255127327e-06, "epoch": 2.7022460383552387, "total_flos": 2578482748925030400, "step": 671700 }, { "loss": 2.99, "learning_rate": 4.631972129731397e-06, "epoch": 2.7026483378994333, "total_flos": 2578876142015262720, "step": 671800 }, { "loss": 3.005, "learning_rate": 4.631161004335465e-06, "epoch": 2.703050637443628, "total_flos": 2579262465842073600, "step": 671900 }, { "loss": 3.0, "learning_rate": 4.630349878939535e-06, "epoch": 2.7034529369878224, "total_flos": 2579642931368693760, "step": 672000 }, { "loss": 3.0275, "learning_rate": 4.629538753543604e-06, "epoch": 2.703855236532017, "total_flos": 2580042358030110720, "step": 672100 }, { "loss": 2.9825, "learning_rate": 4.628727628147674e-06, "epoch": 2.7042575360762116, "total_flos": 2580441370414632960, "step": 672200 }, { "loss": 2.985, "learning_rate": 4.627916502751743e-06, "epoch": 2.704659835620406, "total_flos": 2580823174374297600, "step": 672300 }, { "loss": 2.9725, "learning_rate": 4.627105377355813e-06, "epoch": 2.7050621351646007, "total_flos": 2581203756748247040, "step": 672400 }, { "loss": 2.955, "learning_rate": 4.626294251959882e-06, "epoch": 2.7054644347087953, "total_flos": 2581595407751024640, "step": 672500 }, { "loss": 2.995, "learning_rate": 4.625483126563952e-06, "epoch": 2.7058667342529903, "total_flos": 2581966722007265280, "step": 672600 }, { "loss": 2.9825, "learning_rate": 4.62467200116802e-06, "epoch": 2.706269033797185, "total_flos": 2582341323922452480, "step": 672700 }, { "loss": 2.99, "learning_rate": 4.62386087577209e-06, "epoch": 2.7066713333413794, "total_flos": 2582735094110883840, "step": 672800 }, { "loss": 2.955, "learning_rate": 4.62304975037616e-06, "epoch": 2.707073632885574, "total_flos": 2583121635698626560, "step": 672900 }, { "loss": 2.98, "learning_rate": 4.62223862498023e-06, "epoch": 2.7074759324297686, "total_flos": 2583501463876177920, "step": 673000 }, { "loss": 2.96, "learning_rate": 4.621427499584298e-06, "epoch": 2.707878231973963, "total_flos": 2583900874603868160, "step": 673100 }, { "loss": 3.0125, "learning_rate": 4.620616374188368e-06, "epoch": 2.7082805315181577, "total_flos": 2584272730606817280, "step": 673200 }, { "loss": 3.0125, "learning_rate": 4.619805248792437e-06, "epoch": 2.7086828310623523, "total_flos": 2584663786750464000, "step": 673300 }, { "loss": 2.9675, "learning_rate": 4.618994123396507e-06, "epoch": 2.709085130606547, "total_flos": 2585047991391621120, "step": 673400 }, { "loss": 3.005, "learning_rate": 4.618182998000576e-06, "epoch": 2.709487430150742, "total_flos": 2585430480501534720, "step": 673500 }, { "loss": 2.99, "learning_rate": 4.617371872604646e-06, "epoch": 2.709889729694936, "total_flos": 2585828042916925440, "step": 673600 }, { "loss": 2.96, "learning_rate": 4.616560747208715e-06, "epoch": 2.710292029239131, "total_flos": 2586215625508147200, "step": 673700 }, { "loss": 3.0075, "learning_rate": 4.615749621812785e-06, "epoch": 2.7106943287833256, "total_flos": 2586601513813094400, "step": 673800 }, { "loss": 2.975, "learning_rate": 4.614938496416854e-06, "epoch": 2.71109662832752, "total_flos": 2586989393833881600, "step": 673900 }, { "loss": 2.98, "learning_rate": 4.614127371020923e-06, "epoch": 2.7114989278717148, "total_flos": 2587378936273489920, "step": 674000 }, { "loss": 2.9825, "learning_rate": 4.613316245624993e-06, "epoch": 2.7119012274159093, "total_flos": 2587764394367815680, "step": 674100 }, { "loss": 2.9375, "learning_rate": 4.6125051202290625e-06, "epoch": 2.712303526960104, "total_flos": 2588140568410705920, "step": 674200 }, { "loss": 3.005, "learning_rate": 4.611693994833132e-06, "epoch": 2.7127058265042985, "total_flos": 2588508791523962880, "step": 674300 }, { "loss": 3.0125, "learning_rate": 4.610882869437201e-06, "epoch": 2.713108126048493, "total_flos": 2588901589755064320, "step": 674400 }, { "loss": 3.01, "learning_rate": 4.610071744041271e-06, "epoch": 2.7135104255926876, "total_flos": 2589279325303173120, "step": 674500 }, { "loss": 2.98, "learning_rate": 4.60926061864534e-06, "epoch": 2.7139127251368826, "total_flos": 2589662844794081280, "step": 674600 }, { "loss": 2.96, "learning_rate": 4.608449493249409e-06, "epoch": 2.7143150246810768, "total_flos": 2590044632820019200, "step": 674700 }, { "loss": 2.945, "learning_rate": 4.6076383678534785e-06, "epoch": 2.714717324225272, "total_flos": 2590424657513533440, "step": 674800 }, { "loss": 3.0075, "learning_rate": 4.606827242457549e-06, "epoch": 2.7151196237694664, "total_flos": 2590814513316433920, "step": 674900 }, { "loss": 3.055, "learning_rate": 4.606016117061618e-06, "epoch": 2.715521923313661, "total_flos": 2591201198307717120, "step": 675000 }, { "loss": 2.9975, "learning_rate": 4.605204991665687e-06, "epoch": 2.7159242228578555, "total_flos": 2591582922598748160, "step": 675100 }, { "loss": 3.015, "learning_rate": 4.604393866269756e-06, "epoch": 2.71632652240205, "total_flos": 2591979720195256320, "step": 675200 }, { "loss": 3.035, "learning_rate": 4.603582740873826e-06, "epoch": 2.7167288219462447, "total_flos": 2592358199317278720, "step": 675300 }, { "loss": 2.9425, "learning_rate": 4.602771615477895e-06, "epoch": 2.7171311214904392, "total_flos": 2592747577108377600, "step": 675400 }, { "loss": 2.9225, "learning_rate": 4.601960490081965e-06, "epoch": 2.7175334210346342, "total_flos": 2593115454990888960, "step": 675500 }, { "loss": 2.98, "learning_rate": 4.601149364686034e-06, "epoch": 2.7179357205788284, "total_flos": 2593498177795461120, "step": 675600 }, { "loss": 2.955, "learning_rate": 4.600338239290104e-06, "epoch": 2.7183380201230234, "total_flos": 2593874351838351360, "step": 675700 }, { "loss": 2.955, "learning_rate": 4.599527113894173e-06, "epoch": 2.718740319667218, "total_flos": 2594249638903787520, "step": 675800 }, { "loss": 2.9525, "learning_rate": 4.598715988498242e-06, "epoch": 2.7191426192114125, "total_flos": 2594633275242024960, "step": 675900 }, { "loss": 2.9925, "learning_rate": 4.5979048631023114e-06, "epoch": 2.719544918755607, "total_flos": 2595014372806471680, "step": 676000 }, { "loss": 3.0075, "learning_rate": 4.5970937377063815e-06, "epoch": 2.7199472182998017, "total_flos": 2595386919270912000, "step": 676100 }, { "loss": 3.0025, "learning_rate": 4.596282612310451e-06, "epoch": 2.7203495178439963, "total_flos": 2595776817563750400, "step": 676200 }, { "loss": 2.9725, "learning_rate": 4.59547148691452e-06, "epoch": 2.720751817388191, "total_flos": 2596161574574100480, "step": 676300 }, { "loss": 3.0025, "learning_rate": 4.594660361518589e-06, "epoch": 2.7211541169323854, "total_flos": 2596552540426629120, "step": 676400 }, { "loss": 2.9675, "learning_rate": 4.593849236122659e-06, "epoch": 2.72155641647658, "total_flos": 2596946071609159680, "step": 676500 }, { "loss": 2.9675, "learning_rate": 4.593038110726728e-06, "epoch": 2.721958716020775, "total_flos": 2597344860921507840, "step": 676600 }, { "loss": 2.955, "learning_rate": 4.5922269853307975e-06, "epoch": 2.722361015564969, "total_flos": 2597731397198008320, "step": 676700 }, { "loss": 3.005, "learning_rate": 4.591415859934867e-06, "epoch": 2.722763315109164, "total_flos": 2598097777310208000, "step": 676800 }, { "loss": 2.9525, "learning_rate": 4.590604734538937e-06, "epoch": 2.7231656146533587, "total_flos": 2598473043130675200, "step": 676900 }, { "loss": 2.9875, "learning_rate": 4.589793609143006e-06, "epoch": 2.7235679141975533, "total_flos": 2598844580459089920, "step": 677000 }, { "loss": 2.9475, "learning_rate": 4.588982483747075e-06, "epoch": 2.723970213741748, "total_flos": 2599243869028208640, "step": 677100 }, { "loss": 2.9475, "learning_rate": 4.588171358351144e-06, "epoch": 2.7243725132859424, "total_flos": 2599626825527439360, "step": 677200 }, { "loss": 2.98, "learning_rate": 4.587360232955214e-06, "epoch": 2.724774812830137, "total_flos": 2600011046102323200, "step": 677300 }, { "loss": 2.9725, "learning_rate": 4.5865491075592836e-06, "epoch": 2.7251771123743316, "total_flos": 2600405002184232960, "step": 677400 }, { "loss": 2.995, "learning_rate": 4.585737982163353e-06, "epoch": 2.725579411918526, "total_flos": 2600811466241617920, "step": 677500 }, { "loss": 2.965, "learning_rate": 4.584926856767422e-06, "epoch": 2.7259817114627207, "total_flos": 2601181298661273600, "step": 677600 }, { "loss": 2.995, "learning_rate": 4.584115731371492e-06, "epoch": 2.7263840110069157, "total_flos": 2601568408551936000, "step": 677700 }, { "loss": 2.965, "learning_rate": 4.583304605975561e-06, "epoch": 2.72678631055111, "total_flos": 2601963952695275520, "step": 677800 }, { "loss": 2.905, "learning_rate": 4.58249348057963e-06, "epoch": 2.727188610095305, "total_flos": 2602352624091156480, "step": 677900 }, { "loss": 2.9875, "learning_rate": 4.5816823551837e-06, "epoch": 2.7275909096394995, "total_flos": 2602729759468892160, "step": 678000 }, { "loss": 2.935, "learning_rate": 4.58087122978777e-06, "epoch": 2.727993209183694, "total_flos": 2603113002775203840, "step": 678100 }, { "loss": 2.9975, "learning_rate": 4.580060104391839e-06, "epoch": 2.7283955087278886, "total_flos": 2603499873659965440, "step": 678200 }, { "loss": 2.975, "learning_rate": 4.579248978995908e-06, "epoch": 2.728797808272083, "total_flos": 2603874385284034560, "step": 678300 }, { "loss": 3.0075, "learning_rate": 4.578437853599977e-06, "epoch": 2.7292001078162778, "total_flos": 2604261080897802240, "step": 678400 }, { "loss": 2.9475, "learning_rate": 4.577626728204047e-06, "epoch": 2.7296024073604723, "total_flos": 2604634259400069120, "step": 678500 }, { "loss": 2.965, "learning_rate": 4.5768156028081165e-06, "epoch": 2.7300047069046673, "total_flos": 2605019775918059520, "step": 678600 }, { "loss": 2.995, "learning_rate": 4.576004477412186e-06, "epoch": 2.7304070064488615, "total_flos": 2605388530155540480, "step": 678700 }, { "loss": 2.9675, "learning_rate": 4.575193352016256e-06, "epoch": 2.7308093059930565, "total_flos": 2605778332846018560, "step": 678800 }, { "loss": 2.975, "learning_rate": 4.574382226620325e-06, "epoch": 2.731211605537251, "total_flos": 2606160391745310720, "step": 678900 }, { "loss": 2.965, "learning_rate": 4.573571101224394e-06, "epoch": 2.7316139050814456, "total_flos": 2606545435562741760, "step": 679000 }, { "loss": 2.9875, "learning_rate": 4.572759975828463e-06, "epoch": 2.73201620462564, "total_flos": 2606923420739235840, "step": 679100 }, { "loss": 3.0125, "learning_rate": 4.571948850432533e-06, "epoch": 2.732418504169835, "total_flos": 2607310668722196480, "step": 679200 }, { "loss": 2.9525, "learning_rate": 4.5711377250366025e-06, "epoch": 2.7328208037140294, "total_flos": 2607696711053168640, "step": 679300 }, { "loss": 3.0275, "learning_rate": 4.570326599640672e-06, "epoch": 2.733223103258224, "total_flos": 2608059527321825280, "step": 679400 }, { "loss": 2.9725, "learning_rate": 4.569515474244741e-06, "epoch": 2.7336254028024185, "total_flos": 2608454444738580480, "step": 679500 }, { "loss": 2.965, "learning_rate": 4.568704348848811e-06, "epoch": 2.734027702346613, "total_flos": 2608834554411970560, "step": 679600 }, { "loss": 2.97, "learning_rate": 4.56789322345288e-06, "epoch": 2.734430001890808, "total_flos": 2609227974058414080, "step": 679700 }, { "loss": 2.965, "learning_rate": 4.567082098056949e-06, "epoch": 2.734832301435002, "total_flos": 2609617426206904320, "step": 679800 }, { "loss": 2.95, "learning_rate": 4.5662709726610185e-06, "epoch": 2.7352346009791972, "total_flos": 2609991943142215680, "step": 679900 }, { "loss": 2.995, "learning_rate": 4.565459847265089e-06, "epoch": 2.735636900523392, "total_flos": 2610366672527216640, "step": 680000 }, { "loss": 2.965, "learning_rate": 4.564648721869158e-06, "epoch": 2.7360392000675864, "total_flos": 2610749565291540480, "step": 680100 }, { "loss": 2.96, "learning_rate": 4.563837596473227e-06, "epoch": 2.736441499611781, "total_flos": 2611126371372257280, "step": 680200 }, { "loss": 2.9325, "learning_rate": 4.563026471077296e-06, "epoch": 2.7368437991559755, "total_flos": 2611502242674339840, "step": 680300 }, { "loss": 2.99, "learning_rate": 4.562215345681366e-06, "epoch": 2.73724609870017, "total_flos": 2611896081908920320, "step": 680400 }, { "loss": 2.9825, "learning_rate": 4.561404220285435e-06, "epoch": 2.7376483982443647, "total_flos": 2612285443766292480, "step": 680500 }, { "loss": 2.9725, "learning_rate": 4.560593094889505e-06, "epoch": 2.7380506977885593, "total_flos": 2612662621633966080, "step": 680600 }, { "loss": 3.0125, "learning_rate": 4.559781969493574e-06, "epoch": 2.738452997332754, "total_flos": 2613061039159357440, "step": 680700 }, { "loss": 2.96, "learning_rate": 4.558970844097644e-06, "epoch": 2.738855296876949, "total_flos": 2613450454129152000, "step": 680800 }, { "loss": 2.9275, "learning_rate": 4.558159718701713e-06, "epoch": 2.7392575964211434, "total_flos": 2613834435698135040, "step": 680900 }, { "loss": 2.945, "learning_rate": 4.557348593305782e-06, "epoch": 2.739659895965338, "total_flos": 2614212436808355840, "step": 681000 }, { "loss": 2.965, "learning_rate": 4.5565374679098514e-06, "epoch": 2.7400621955095326, "total_flos": 2614605086324674560, "step": 681100 }, { "loss": 2.96, "learning_rate": 4.5557263425139215e-06, "epoch": 2.740464495053727, "total_flos": 2614983910677442560, "step": 681200 }, { "loss": 2.9975, "learning_rate": 4.554915217117991e-06, "epoch": 2.7408667945979217, "total_flos": 2615372412113571840, "step": 681300 }, { "loss": 3.01, "learning_rate": 4.55410409172206e-06, "epoch": 2.7412690941421163, "total_flos": 2615749669649879040, "step": 681400 }, { "loss": 2.9675, "learning_rate": 4.553292966326129e-06, "epoch": 2.741671393686311, "total_flos": 2616134931228241920, "step": 681500 }, { "loss": 2.9825, "learning_rate": 4.552481840930199e-06, "epoch": 2.7420736932305054, "total_flos": 2616515136503992320, "step": 681600 }, { "loss": 2.95, "learning_rate": 4.551670715534268e-06, "epoch": 2.7424759927747004, "total_flos": 2616896127843594240, "step": 681700 }, { "loss": 2.9775, "learning_rate": 4.5508595901383375e-06, "epoch": 2.7428782923188946, "total_flos": 2617271834497167360, "step": 681800 }, { "loss": 2.975, "learning_rate": 4.550048464742407e-06, "epoch": 2.7432805918630896, "total_flos": 2617654079289937920, "step": 681900 }, { "loss": 2.9275, "learning_rate": 4.549237339346477e-06, "epoch": 2.743682891407284, "total_flos": 2618036807405752320, "step": 682000 }, { "loss": 2.9475, "learning_rate": 4.548426213950546e-06, "epoch": 2.7440851909514787, "total_flos": 2618418266134671360, "step": 682100 }, { "loss": 2.9675, "learning_rate": 4.547615088554615e-06, "epoch": 2.7444874904956733, "total_flos": 2618798567012782080, "step": 682200 }, { "loss": 2.9975, "learning_rate": 4.546803963158684e-06, "epoch": 2.744889790039868, "total_flos": 2619170592975482880, "step": 682300 }, { "loss": 2.9475, "learning_rate": 4.545992837762754e-06, "epoch": 2.7452920895840625, "total_flos": 2619571522718453760, "step": 682400 }, { "loss": 3.015, "learning_rate": 4.5451817123668236e-06, "epoch": 2.745694389128257, "total_flos": 2619965595647692800, "step": 682500 }, { "loss": 2.9475, "learning_rate": 4.544370586970893e-06, "epoch": 2.7460966886724516, "total_flos": 2620349269164625920, "step": 682600 }, { "loss": 2.9775, "learning_rate": 4.543559461574962e-06, "epoch": 2.746498988216646, "total_flos": 2620727264963604480, "step": 682700 }, { "loss": 2.96, "learning_rate": 4.542748336179032e-06, "epoch": 2.746901287760841, "total_flos": 2621097894069596160, "step": 682800 }, { "loss": 2.955, "learning_rate": 4.541937210783101e-06, "epoch": 2.7473035873050353, "total_flos": 2621470217461862400, "step": 682900 }, { "loss": 2.9275, "learning_rate": 4.541126085387171e-06, "epoch": 2.7477058868492303, "total_flos": 2621856902453145600, "step": 683000 }, { "loss": 2.985, "learning_rate": 4.54031495999124e-06, "epoch": 2.748108186393425, "total_flos": 2622232646285414400, "step": 683100 }, { "loss": 2.935, "learning_rate": 4.53950383459531e-06, "epoch": 2.7485104859376195, "total_flos": 2622618964800983040, "step": 683200 }, { "loss": 2.91, "learning_rate": 4.53869270919938e-06, "epoch": 2.748912785481814, "total_flos": 2623000917475430400, "step": 683300 }, { "loss": 2.9575, "learning_rate": 4.537881583803449e-06, "epoch": 2.7493150850260086, "total_flos": 2623388250438266880, "step": 683400 }, { "loss": 2.98, "learning_rate": 4.537070458407518e-06, "epoch": 2.749717384570203, "total_flos": 2623773442970480640, "step": 683500 }, { "loss": 2.875, "learning_rate": 4.536259333011587e-06, "epoch": 2.7501196841143978, "total_flos": 2624145537979330560, "step": 683600 }, { "loss": 2.975, "learning_rate": 4.535448207615657e-06, "epoch": 2.7505219836585924, "total_flos": 2624517096552714240, "step": 683700 }, { "loss": 2.93, "learning_rate": 4.5346370822197265e-06, "epoch": 2.750924283202787, "total_flos": 2624893769852375040, "step": 683800 }, { "loss": 2.9825, "learning_rate": 4.533825956823796e-06, "epoch": 2.751326582746982, "total_flos": 2625285946668134400, "step": 683900 }, { "loss": 2.9325, "learning_rate": 4.533014831427865e-06, "epoch": 2.7517288822911765, "total_flos": 2625676179569233920, "step": 684000 }, { "loss": 2.9775, "learning_rate": 4.532203706031935e-06, "epoch": 2.752131181835371, "total_flos": 2626057059372748800, "step": 684100 }, { "loss": 2.9525, "learning_rate": 4.531392580636004e-06, "epoch": 2.7525334813795657, "total_flos": 2626445709523660800, "step": 684200 }, { "loss": 2.94, "learning_rate": 4.530581455240073e-06, "epoch": 2.7529357809237602, "total_flos": 2626811691292692480, "step": 684300 }, { "loss": 2.95, "learning_rate": 4.5297703298441425e-06, "epoch": 2.753338080467955, "total_flos": 2627177216294891520, "step": 684400 }, { "loss": 2.9475, "learning_rate": 4.5289592044482126e-06, "epoch": 2.7537403800121494, "total_flos": 2627548817358213120, "step": 684500 }, { "loss": 2.91, "learning_rate": 4.528148079052282e-06, "epoch": 2.754142679556344, "total_flos": 2627920753029795840, "step": 684600 }, { "loss": 2.965, "learning_rate": 4.527336953656351e-06, "epoch": 2.7545449791005385, "total_flos": 2628316143147110400, "step": 684700 }, { "loss": 2.975, "learning_rate": 4.52652582826042e-06, "epoch": 2.7549472786447335, "total_flos": 2628710189520138240, "step": 684800 }, { "loss": 2.995, "learning_rate": 4.52571470286449e-06, "epoch": 2.7553495781889277, "total_flos": 2629116021539696640, "step": 684900 }, { "loss": 2.985, "learning_rate": 4.524903577468559e-06, "epoch": 2.7557518777331227, "total_flos": 2629488583937863680, "step": 685000 }, { "loss": 2.9725, "learning_rate": 4.524092452072629e-06, "epoch": 2.7561541772773173, "total_flos": 2629869123821875200, "step": 685100 }, { "loss": 2.9925, "learning_rate": 4.523281326676698e-06, "epoch": 2.756556476821512, "total_flos": 2630251044628869120, "step": 685200 }, { "loss": 3.0475, "learning_rate": 4.522470201280768e-06, "epoch": 2.7569587763657064, "total_flos": 2630633188508037120, "step": 685300 }, { "loss": 3.0, "learning_rate": 4.521659075884837e-06, "epoch": 2.757361075909901, "total_flos": 2631040751992565760, "step": 685400 }, { "loss": 2.9675, "learning_rate": 4.520847950488906e-06, "epoch": 2.7577633754540956, "total_flos": 2631420558925148160, "step": 685500 }, { "loss": 2.9625, "learning_rate": 4.520036825092975e-06, "epoch": 2.75816567499829, "total_flos": 2631813070349168640, "step": 685600 }, { "loss": 2.9475, "learning_rate": 4.5192256996970455e-06, "epoch": 2.7585679745424847, "total_flos": 2632200536093061120, "step": 685700 }, { "loss": 2.955, "learning_rate": 4.518414574301115e-06, "epoch": 2.7589702740866793, "total_flos": 2632572747949240320, "step": 685800 }, { "loss": 2.94, "learning_rate": 4.517603448905184e-06, "epoch": 2.7593725736308743, "total_flos": 2632952124671201280, "step": 685900 }, { "loss": 2.9625, "learning_rate": 4.516792323509253e-06, "epoch": 2.7597748731750684, "total_flos": 2633322212030484480, "step": 686000 }, { "loss": 3.005, "learning_rate": 4.515981198113323e-06, "epoch": 2.7601771727192634, "total_flos": 2633695167460577280, "step": 686100 }, { "loss": 2.9475, "learning_rate": 4.515170072717392e-06, "epoch": 2.760579472263458, "total_flos": 2634080540575027200, "step": 686200 }, { "loss": 2.9375, "learning_rate": 4.5143589473214615e-06, "epoch": 2.7609817718076526, "total_flos": 2634450415484620800, "step": 686300 }, { "loss": 2.985, "learning_rate": 4.513547821925531e-06, "epoch": 2.761384071351847, "total_flos": 2634830418933166080, "step": 686400 }, { "loss": 2.985, "learning_rate": 4.512736696529601e-06, "epoch": 2.7617863708960417, "total_flos": 2635197871916298240, "step": 686500 }, { "loss": 3.0075, "learning_rate": 4.51192557113367e-06, "epoch": 2.7621886704402363, "total_flos": 2635583133494661120, "step": 686600 }, { "loss": 2.95, "learning_rate": 4.511114445737739e-06, "epoch": 2.762590969984431, "total_flos": 2635960497255813120, "step": 686700 }, { "loss": 2.9875, "learning_rate": 4.510303320341808e-06, "epoch": 2.762993269528626, "total_flos": 2636347012287344640, "step": 686800 }, { "loss": 2.975, "learning_rate": 4.509492194945878e-06, "epoch": 2.76339556907282, "total_flos": 2636729097742848000, "step": 686900 }, { "loss": 2.92, "learning_rate": 4.5086810695499475e-06, "epoch": 2.763797868617015, "total_flos": 2637106052538347520, "step": 687000 }, { "loss": 2.9425, "learning_rate": 4.507869944154017e-06, "epoch": 2.7642001681612096, "total_flos": 2637496195148328960, "step": 687100 }, { "loss": 2.915, "learning_rate": 4.507058818758086e-06, "epoch": 2.764602467705404, "total_flos": 2637900109809438720, "step": 687200 }, { "loss": 2.95, "learning_rate": 4.506247693362156e-06, "epoch": 2.7650047672495988, "total_flos": 2638290034658488320, "step": 687300 }, { "loss": 2.975, "learning_rate": 4.505436567966225e-06, "epoch": 2.7654070667937933, "total_flos": 2638648591310868480, "step": 687400 }, { "loss": 2.99, "learning_rate": 4.504625442570294e-06, "epoch": 2.765809366337988, "total_flos": 2639025530172641280, "step": 687500 }, { "loss": 2.9325, "learning_rate": 4.5038143171743636e-06, "epoch": 2.7662116658821825, "total_flos": 2639410855485911040, "step": 687600 }, { "loss": 2.98, "learning_rate": 4.503003191778434e-06, "epoch": 2.766613965426377, "total_flos": 2639782254722027520, "step": 687700 }, { "loss": 2.9525, "learning_rate": 4.502192066382503e-06, "epoch": 2.7670162649705716, "total_flos": 2640165163420078080, "step": 687800 }, { "loss": 2.9625, "learning_rate": 4.501380940986572e-06, "epoch": 2.7674185645147666, "total_flos": 2640551981192417280, "step": 687900 }, { "loss": 2.8975, "learning_rate": 4.500569815590642e-06, "epoch": 2.7678208640589608, "total_flos": 2640946707404451840, "step": 688000 }, { "loss": 2.97, "learning_rate": 4.499758690194711e-06, "epoch": 2.768223163603156, "total_flos": 2641309852970127360, "step": 688100 }, { "loss": 3.0, "learning_rate": 4.4989475647987804e-06, "epoch": 2.7686254631473504, "total_flos": 2641693037852774400, "step": 688200 }, { "loss": 2.9375, "learning_rate": 4.49813643940285e-06, "epoch": 2.769027762691545, "total_flos": 2642076222735421440, "step": 688300 }, { "loss": 2.945, "learning_rate": 4.49732531400692e-06, "epoch": 2.7694300622357395, "total_flos": 2642462344735027200, "step": 688400 }, { "loss": 3.0025, "learning_rate": 4.496514188610989e-06, "epoch": 2.769832361779934, "total_flos": 2642825001666416640, "step": 688500 }, { "loss": 2.9375, "learning_rate": 4.495703063215058e-06, "epoch": 2.7702346613241287, "total_flos": 2643210443827015680, "step": 688600 }, { "loss": 2.935, "learning_rate": 4.494891937819127e-06, "epoch": 2.7706369608683232, "total_flos": 2643590691592704000, "step": 688700 }, { "loss": 2.9675, "learning_rate": 4.494080812423197e-06, "epoch": 2.771039260412518, "total_flos": 2643975857568706560, "step": 688800 }, { "loss": 2.98, "learning_rate": 4.4932696870272665e-06, "epoch": 2.7714415599567124, "total_flos": 2644350517907558400, "step": 688900 }, { "loss": 3.0025, "learning_rate": 4.492458561631336e-06, "epoch": 2.7718438595009074, "total_flos": 2644727249630883840, "step": 689000 }, { "loss": 2.995, "learning_rate": 4.491647436235405e-06, "epoch": 2.772246159045102, "total_flos": 2645119144950804480, "step": 689100 }, { "loss": 2.925, "learning_rate": 4.490836310839475e-06, "epoch": 2.7726484585892965, "total_flos": 2645500072555499520, "step": 689200 }, { "loss": 2.975, "learning_rate": 4.490025185443544e-06, "epoch": 2.773050758133491, "total_flos": 2645881021405163520, "step": 689300 }, { "loss": 2.9725, "learning_rate": 4.489214060047613e-06, "epoch": 2.7734530576776857, "total_flos": 2646264960484208640, "step": 689400 }, { "loss": 2.9625, "learning_rate": 4.4884029346516825e-06, "epoch": 2.7738553572218803, "total_flos": 2646669209753579520, "step": 689500 }, { "loss": 2.92, "learning_rate": 4.4875918092557526e-06, "epoch": 2.774257656766075, "total_flos": 2647035876672860160, "step": 689600 }, { "loss": 2.97, "learning_rate": 4.486780683859822e-06, "epoch": 2.7746599563102694, "total_flos": 2647404705267732480, "step": 689700 }, { "loss": 2.9475, "learning_rate": 4.485969558463891e-06, "epoch": 2.775062255854464, "total_flos": 2647781569772113920, "step": 689800 }, { "loss": 2.9825, "learning_rate": 4.48515843306796e-06, "epoch": 2.775464555398659, "total_flos": 2648170942251970560, "step": 689900 }, { "loss": 2.9525, "learning_rate": 4.48434730767203e-06, "epoch": 2.775866854942853, "total_flos": 2648571893239910400, "step": 690000 }, { "loss": 2.9975, "learning_rate": 4.483536182276099e-06, "epoch": 2.776269154487048, "total_flos": 2648973412530769920, "step": 690100 }, { "loss": 3.015, "learning_rate": 4.482725056880169e-06, "epoch": 2.7766714540312427, "total_flos": 2649355115576832000, "step": 690200 }, { "loss": 2.9425, "learning_rate": 4.481913931484238e-06, "epoch": 2.7770737535754373, "total_flos": 2649740584293642240, "step": 690300 }, { "loss": 2.96, "learning_rate": 4.481102806088308e-06, "epoch": 2.777476053119632, "total_flos": 2650116285635973120, "step": 690400 }, { "loss": 2.9975, "learning_rate": 4.480291680692377e-06, "epoch": 2.7778783526638264, "total_flos": 2650498965950607360, "step": 690500 }, { "loss": 2.9925, "learning_rate": 4.479480555296446e-06, "epoch": 2.778280652208021, "total_flos": 2650859631166156800, "step": 690600 }, { "loss": 2.95, "learning_rate": 4.478669429900515e-06, "epoch": 2.7786829517522156, "total_flos": 2651234599557058560, "step": 690700 }, { "loss": 2.995, "learning_rate": 4.4778583045045855e-06, "epoch": 2.77908525129641, "total_flos": 2651628587506421760, "step": 690800 }, { "loss": 2.995, "learning_rate": 4.477047179108655e-06, "epoch": 2.7794875508406047, "total_flos": 2652020355356528640, "step": 690900 }, { "loss": 2.92, "learning_rate": 4.476236053712724e-06, "epoch": 2.7798898503847997, "total_flos": 2652390421470842880, "step": 691000 }, { "loss": 2.975, "learning_rate": 4.475424928316793e-06, "epoch": 2.780292149928994, "total_flos": 2652774711091875840, "step": 691100 }, { "loss": 3.0225, "learning_rate": 4.474613802920863e-06, "epoch": 2.780694449473189, "total_flos": 2653151283477934080, "step": 691200 }, { "loss": 2.9425, "learning_rate": 4.473802677524932e-06, "epoch": 2.7810967490173835, "total_flos": 2653532078301573120, "step": 691300 }, { "loss": 2.935, "learning_rate": 4.4729915521290015e-06, "epoch": 2.781499048561578, "total_flos": 2653919140391055360, "step": 691400 }, { "loss": 2.955, "learning_rate": 4.472180426733071e-06, "epoch": 2.7819013481057726, "total_flos": 2654279216058716160, "step": 691500 }, { "loss": 2.9475, "learning_rate": 4.471369301337141e-06, "epoch": 2.782303647649967, "total_flos": 2654655374167879680, "step": 691600 }, { "loss": 2.985, "learning_rate": 4.47055817594121e-06, "epoch": 2.7827059471941618, "total_flos": 2655035945919344640, "step": 691700 }, { "loss": 2.965, "learning_rate": 4.469747050545279e-06, "epoch": 2.7831082467383563, "total_flos": 2655416857590312960, "step": 691800 }, { "loss": 2.9525, "learning_rate": 4.468935925149348e-06, "epoch": 2.783510546282551, "total_flos": 2655795735055503360, "step": 691900 }, { "loss": 2.935, "learning_rate": 4.468124799753418e-06, "epoch": 2.7839128458267455, "total_flos": 2656183891260887040, "step": 692000 }, { "loss": 2.9825, "learning_rate": 4.4673136743574875e-06, "epoch": 2.7843151453709405, "total_flos": 2656574660597452800, "step": 692100 }, { "loss": 2.955, "learning_rate": 4.466502548961557e-06, "epoch": 2.784717444915135, "total_flos": 2656963905607495680, "step": 692200 }, { "loss": 2.95, "learning_rate": 4.465691423565626e-06, "epoch": 2.7851197444593296, "total_flos": 2657357601438535680, "step": 692300 }, { "loss": 2.9325, "learning_rate": 4.464880298169696e-06, "epoch": 2.785522044003524, "total_flos": 2657743155135221760, "step": 692400 }, { "loss": 2.9575, "learning_rate": 4.464069172773765e-06, "epoch": 2.785924343547719, "total_flos": 2658127476623708160, "step": 692500 }, { "loss": 2.97, "learning_rate": 4.463258047377834e-06, "epoch": 2.7863266430919134, "total_flos": 2658515537226731520, "step": 692600 }, { "loss": 2.9675, "learning_rate": 4.462446921981904e-06, "epoch": 2.786728942636108, "total_flos": 2658896332050370560, "step": 692700 }, { "loss": 2.93, "learning_rate": 4.461635796585974e-06, "epoch": 2.7871312421803025, "total_flos": 2659282958617989120, "step": 692800 }, { "loss": 2.95, "learning_rate": 4.460824671190044e-06, "epoch": 2.787533541724497, "total_flos": 2659659228263239680, "step": 692900 }, { "loss": 2.985, "learning_rate": 4.460013545794112e-06, "epoch": 2.787935841268692, "total_flos": 2660053184345149440, "step": 693000 }, { "loss": 3.04, "learning_rate": 4.459202420398182e-06, "epoch": 2.7883381408128862, "total_flos": 2660441531755253760, "step": 693100 }, { "loss": 2.945, "learning_rate": 4.458391295002251e-06, "epoch": 2.7887404403570812, "total_flos": 2660826490592808960, "step": 693200 }, { "loss": 3.0025, "learning_rate": 4.457580169606321e-06, "epoch": 2.789142739901276, "total_flos": 2661206733047255040, "step": 693300 }, { "loss": 2.9475, "learning_rate": 4.45676904421039e-06, "epoch": 2.7895450394454704, "total_flos": 2661608671926251520, "step": 693400 }, { "loss": 2.965, "learning_rate": 4.45595791881446e-06, "epoch": 2.789947338989665, "total_flos": 2661973309950996480, "step": 693500 }, { "loss": 2.9725, "learning_rate": 4.455146793418529e-06, "epoch": 2.7903496385338595, "total_flos": 2662353424935628800, "step": 693600 }, { "loss": 3.01, "learning_rate": 4.454335668022599e-06, "epoch": 2.790751938078054, "total_flos": 2662724107154042880, "step": 693700 }, { "loss": 2.94, "learning_rate": 4.453524542626667e-06, "epoch": 2.7911542376222487, "total_flos": 2663107265480478720, "step": 693800 }, { "loss": 2.9525, "learning_rate": 4.452713417230737e-06, "epoch": 2.7915565371664433, "total_flos": 2663500254916300800, "step": 693900 }, { "loss": 3.0, "learning_rate": 4.4519022918348065e-06, "epoch": 2.791958836710638, "total_flos": 2663878548144844800, "step": 694000 }, { "loss": 2.98, "learning_rate": 4.4510911664388765e-06, "epoch": 2.792361136254833, "total_flos": 2664265843928985600, "step": 694100 }, { "loss": 3.03, "learning_rate": 4.450280041042945e-06, "epoch": 2.792763435799027, "total_flos": 2664653973578158080, "step": 694200 }, { "loss": 2.97, "learning_rate": 4.449468915647015e-06, "epoch": 2.793165735343222, "total_flos": 2665049491165286400, "step": 694300 }, { "loss": 2.9675, "learning_rate": 4.448657790251084e-06, "epoch": 2.7935680348874166, "total_flos": 2665449337414840320, "step": 694400 }, { "loss": 3.0275, "learning_rate": 4.447846664855154e-06, "epoch": 2.793970334431611, "total_flos": 2665834843310346240, "step": 694500 }, { "loss": 2.9475, "learning_rate": 4.447035539459223e-06, "epoch": 2.7943726339758057, "total_flos": 2666215016718643200, "step": 694600 }, { "loss": 3.0025, "learning_rate": 4.4462244140632926e-06, "epoch": 2.7947749335200003, "total_flos": 2666604357331046400, "step": 694700 }, { "loss": 2.96, "learning_rate": 4.445413288667362e-06, "epoch": 2.795177233064195, "total_flos": 2667002434936934400, "step": 694800 }, { "loss": 2.91, "learning_rate": 4.444602163271432e-06, "epoch": 2.7955795326083894, "total_flos": 2667377387394109440, "step": 694900 }, { "loss": 2.905, "learning_rate": 4.443791037875501e-06, "epoch": 2.7959818321525844, "total_flos": 2667737144387235840, "step": 695000 }, { "loss": 2.975, "learning_rate": 4.44297991247957e-06, "epoch": 2.7963841316967786, "total_flos": 2668115485416960000, "step": 695100 }, { "loss": 2.925, "learning_rate": 4.442168787083639e-06, "epoch": 2.7967864312409736, "total_flos": 2668512660111667200, "step": 695200 }, { "loss": 3.02, "learning_rate": 4.4413576616877094e-06, "epoch": 2.797188730785168, "total_flos": 2668904018996121600, "step": 695300 }, { "loss": 2.99, "learning_rate": 4.440546536291779e-06, "epoch": 2.7975910303293627, "total_flos": 2669287740314234880, "step": 695400 }, { "loss": 2.9125, "learning_rate": 4.439735410895848e-06, "epoch": 2.7979933298735573, "total_flos": 2669672146782597120, "step": 695500 }, { "loss": 3.0225, "learning_rate": 4.438924285499917e-06, "epoch": 2.798395629417752, "total_flos": 2670063866831523840, "step": 695600 }, { "loss": 2.97, "learning_rate": 4.438113160103987e-06, "epoch": 2.7987979289619465, "total_flos": 2670454073176412160, "step": 695700 }, { "loss": 2.9625, "learning_rate": 4.437302034708056e-06, "epoch": 2.799200228506141, "total_flos": 2670821903257743360, "step": 695800 }, { "loss": 2.95, "learning_rate": 4.4364909093121255e-06, "epoch": 2.7996025280503356, "total_flos": 2671216539178659840, "step": 695900 }, { "loss": 2.98, "learning_rate": 4.435679783916195e-06, "epoch": 2.80000482759453, "total_flos": 2671605916969758720, "step": 696000 }, { "loss": 2.9775, "learning_rate": 4.434868658520265e-06, "epoch": 2.800407127138725, "total_flos": 2671988257364889600, "step": 696100 }, { "loss": 2.9975, "learning_rate": 4.434057533124334e-06, "epoch": 2.8008094266829193, "total_flos": 2672381007794810880, "step": 696200 }, { "loss": 2.905, "learning_rate": 4.433246407728403e-06, "epoch": 2.8012117262271143, "total_flos": 2672765467375595520, "step": 696300 }, { "loss": 2.97, "learning_rate": 4.432435282332472e-06, "epoch": 2.801614025771309, "total_flos": 2673152853450854400, "step": 696400 }, { "loss": 3.0075, "learning_rate": 4.431624156936542e-06, "epoch": 2.8020163253155035, "total_flos": 2673548700335001600, "step": 696500 }, { "loss": 2.91, "learning_rate": 4.4308130315406115e-06, "epoch": 2.802418624859698, "total_flos": 2673940261046661120, "step": 696600 }, { "loss": 2.9275, "learning_rate": 4.430001906144681e-06, "epoch": 2.8028209244038926, "total_flos": 2674345227333734400, "step": 696700 }, { "loss": 2.9375, "learning_rate": 4.42919078074875e-06, "epoch": 2.803223223948087, "total_flos": 2674732719633838080, "step": 696800 }, { "loss": 2.99, "learning_rate": 4.42837965535282e-06, "epoch": 2.803625523492282, "total_flos": 2675112823995985920, "step": 696900 }, { "loss": 2.985, "learning_rate": 4.427568529956889e-06, "epoch": 2.8040278230364764, "total_flos": 2675510073048084480, "step": 697000 }, { "loss": 2.98, "learning_rate": 4.426757404560958e-06, "epoch": 2.804430122580671, "total_flos": 2675891908875202560, "step": 697100 }, { "loss": 2.97, "learning_rate": 4.4259462791650275e-06, "epoch": 2.804832422124866, "total_flos": 2676280702429655040, "step": 697200 }, { "loss": 2.915, "learning_rate": 4.425135153769098e-06, "epoch": 2.80523472166906, "total_flos": 2676668725853982720, "step": 697300 }, { "loss": 2.9175, "learning_rate": 4.424324028373167e-06, "epoch": 2.805637021213255, "total_flos": 2677044459063767040, "step": 697400 }, { "loss": 2.9975, "learning_rate": 4.423512902977236e-06, "epoch": 2.8060393207574497, "total_flos": 2677422816027217920, "step": 697500 }, { "loss": 3.0075, "learning_rate": 4.422701777581306e-06, "epoch": 2.8064416203016442, "total_flos": 2677816049780183040, "step": 697600 }, { "loss": 2.985, "learning_rate": 4.421890652185375e-06, "epoch": 2.806843919845839, "total_flos": 2678186190251888640, "step": 697700 }, { "loss": 2.9575, "learning_rate": 4.421079526789444e-06, "epoch": 2.8072462193900334, "total_flos": 2678558396796825600, "step": 697800 }, { "loss": 2.9775, "learning_rate": 4.420268401393514e-06, "epoch": 2.807648518934228, "total_flos": 2678937598247792640, "step": 697900 }, { "loss": 2.9625, "learning_rate": 4.419457275997584e-06, "epoch": 2.8080508184784225, "total_flos": 2679314324659875840, "step": 698000 }, { "loss": 2.97, "learning_rate": 4.418646150601653e-06, "epoch": 2.8084531180226175, "total_flos": 2679691252899164160, "step": 698100 }, { "loss": 2.9225, "learning_rate": 4.417835025205722e-06, "epoch": 2.8088554175668117, "total_flos": 2680064399533977600, "step": 698200 }, { "loss": 2.9775, "learning_rate": 4.417023899809791e-06, "epoch": 2.8092577171110067, "total_flos": 2680458169722408960, "step": 698300 }, { "loss": 2.9375, "learning_rate": 4.416212774413861e-06, "epoch": 2.8096600166552013, "total_flos": 2680843755286548480, "step": 698400 }, { "loss": 2.955, "learning_rate": 4.4154016490179305e-06, "epoch": 2.810062316199396, "total_flos": 2681222887691366400, "step": 698500 }, { "loss": 2.9225, "learning_rate": 4.414590523622e-06, "epoch": 2.8104646157435904, "total_flos": 2681593437128724480, "step": 698600 }, { "loss": 2.95, "learning_rate": 4.413779398226069e-06, "epoch": 2.810866915287785, "total_flos": 2681983239819202560, "step": 698700 }, { "loss": 3.03, "learning_rate": 4.412968272830139e-06, "epoch": 2.8112692148319796, "total_flos": 2682369744228249600, "step": 698800 }, { "loss": 2.9175, "learning_rate": 4.412157147434208e-06, "epoch": 2.811671514376174, "total_flos": 2682754618085928960, "step": 698900 }, { "loss": 2.935, "learning_rate": 4.411346022038277e-06, "epoch": 2.8120738139203687, "total_flos": 2683155457537781760, "step": 699000 }, { "loss": 2.975, "learning_rate": 4.4105348966423465e-06, "epoch": 2.8124761134645633, "total_flos": 2683539455040491520, "step": 699100 }, { "loss": 2.905, "learning_rate": 4.4097237712464165e-06, "epoch": 2.8128784130087583, "total_flos": 2683919214171893760, "step": 699200 }, { "loss": 2.965, "learning_rate": 4.408912645850486e-06, "epoch": 2.8132807125529524, "total_flos": 2684311672483491840, "step": 699300 }, { "loss": 2.9675, "learning_rate": 4.408101520454555e-06, "epoch": 2.8136830120971474, "total_flos": 2684695468158996480, "step": 699400 }, { "loss": 2.925, "learning_rate": 4.407290395058624e-06, "epoch": 2.814085311641342, "total_flos": 2685092154219417600, "step": 699500 }, { "loss": 2.9175, "learning_rate": 4.406479269662694e-06, "epoch": 2.8144876111855366, "total_flos": 2685474260919889920, "step": 699600 }, { "loss": 2.9525, "learning_rate": 4.405668144266763e-06, "epoch": 2.814889910729731, "total_flos": 2685852543525949440, "step": 699700 }, { "loss": 2.9525, "learning_rate": 4.4048570188708326e-06, "epoch": 2.8152922102739257, "total_flos": 2686233348972072960, "step": 699800 }, { "loss": 2.9875, "learning_rate": 4.404045893474902e-06, "epoch": 2.8156945098181203, "total_flos": 2686627156339200000, "step": 699900 }, { "loss": 2.9475, "learning_rate": 4.403234768078972e-06, "epoch": 2.816096809362315, "total_flos": 2687026938853847040, "step": 700000 }, { "loss": 3.0025, "learning_rate": 4.402423642683041e-06, "epoch": 2.8164991089065095, "total_flos": 2687404902785372160, "step": 700100 }, { "loss": 2.9625, "learning_rate": 4.40161251728711e-06, "epoch": 2.816901408450704, "total_flos": 2687792209191997440, "step": 700200 }, { "loss": 2.965, "learning_rate": 4.400801391891179e-06, "epoch": 2.817303707994899, "total_flos": 2688167012934389760, "step": 700300 }, { "loss": 2.9125, "learning_rate": 4.3999902664952494e-06, "epoch": 2.8177060075390936, "total_flos": 2688543144487342080, "step": 700400 }, { "loss": 3.005, "learning_rate": 4.399179141099319e-06, "epoch": 2.818108307083288, "total_flos": 2688923264783216640, "step": 700500 }, { "loss": 2.935, "learning_rate": 4.398368015703388e-06, "epoch": 2.8185106066274828, "total_flos": 2689303416946544640, "step": 700600 }, { "loss": 2.9525, "learning_rate": 4.397556890307457e-06, "epoch": 2.8189129061716773, "total_flos": 2689677100016824320, "step": 700700 }, { "loss": 2.945, "learning_rate": 4.396745764911527e-06, "epoch": 2.819315205715872, "total_flos": 2690043310169272320, "step": 700800 }, { "loss": 2.905, "learning_rate": 4.395934639515596e-06, "epoch": 2.8197175052600665, "total_flos": 2690441658648514560, "step": 700900 }, { "loss": 2.97, "learning_rate": 4.3951235141196655e-06, "epoch": 2.820119804804261, "total_flos": 2690818788715008000, "step": 701000 }, { "loss": 2.93, "learning_rate": 4.394312388723735e-06, "epoch": 2.8205221043484556, "total_flos": 2691191032438640640, "step": 701100 }, { "loss": 2.94, "learning_rate": 4.393501263327805e-06, "epoch": 2.8209244038926506, "total_flos": 2691582099204771840, "step": 701200 }, { "loss": 3.0175, "learning_rate": 4.392690137931874e-06, "epoch": 2.8213267034368448, "total_flos": 2691970112006615040, "step": 701300 }, { "loss": 2.94, "learning_rate": 4.391879012535943e-06, "epoch": 2.82172900298104, "total_flos": 2692363186422312960, "step": 701400 }, { "loss": 2.945, "learning_rate": 4.391067887140012e-06, "epoch": 2.8221313025252344, "total_flos": 2692760308004597760, "step": 701500 }, { "loss": 2.98, "learning_rate": 4.390256761744082e-06, "epoch": 2.822533602069429, "total_flos": 2693130475032514560, "step": 701600 }, { "loss": 3.015, "learning_rate": 4.3894456363481515e-06, "epoch": 2.8229359016136235, "total_flos": 2693522715583180800, "step": 701700 }, { "loss": 2.9625, "learning_rate": 4.388634510952221e-06, "epoch": 2.823338201157818, "total_flos": 2693905082534522880, "step": 701800 }, { "loss": 2.905, "learning_rate": 4.38782338555629e-06, "epoch": 2.8237405007020127, "total_flos": 2694262603494666240, "step": 701900 }, { "loss": 2.99, "learning_rate": 4.38701226016036e-06, "epoch": 2.8241428002462072, "total_flos": 2694649994881167360, "step": 702000 }, { "loss": 2.9775, "learning_rate": 4.386201134764429e-06, "epoch": 2.824545099790402, "total_flos": 2695034082674995200, "step": 702100 }, { "loss": 2.9575, "learning_rate": 4.385390009368498e-06, "epoch": 2.8249473993345964, "total_flos": 2695432032811069440, "step": 702200 }, { "loss": 2.965, "learning_rate": 4.384578883972568e-06, "epoch": 2.8253496988787914, "total_flos": 2695821851435274240, "step": 702300 }, { "loss": 2.9325, "learning_rate": 4.383767758576638e-06, "epoch": 2.8257519984229855, "total_flos": 2696208297420656640, "step": 702400 }, { "loss": 2.9675, "learning_rate": 4.382956633180707e-06, "epoch": 2.8261542979671805, "total_flos": 2696605865147289600, "step": 702500 }, { "loss": 2.9275, "learning_rate": 4.382145507784776e-06, "epoch": 2.826556597511375, "total_flos": 2696976504875765760, "step": 702600 }, { "loss": 2.9825, "learning_rate": 4.381334382388846e-06, "epoch": 2.8269588970555697, "total_flos": 2697380355801968640, "step": 702700 }, { "loss": 2.92, "learning_rate": 4.380523256992915e-06, "epoch": 2.8273611965997643, "total_flos": 2697757443378524160, "step": 702800 }, { "loss": 3.005, "learning_rate": 4.379712131596984e-06, "epoch": 2.827763496143959, "total_flos": 2698141525861109760, "step": 702900 }, { "loss": 2.9625, "learning_rate": 4.378901006201054e-06, "epoch": 2.8281657956881534, "total_flos": 2698543974619361280, "step": 703000 }, { "loss": 2.9175, "learning_rate": 4.378089880805124e-06, "epoch": 2.828568095232348, "total_flos": 2698943443770716160, "step": 703100 }, { "loss": 2.97, "learning_rate": 4.377278755409193e-06, "epoch": 2.8289703947765426, "total_flos": 2699334616761692160, "step": 703200 }, { "loss": 2.975, "learning_rate": 4.376467630013263e-06, "epoch": 2.829372694320737, "total_flos": 2699716967779307520, "step": 703300 }, { "loss": 2.9275, "learning_rate": 4.375656504617331e-06, "epoch": 2.829774993864932, "total_flos": 2700107317527736320, "step": 703400 }, { "loss": 2.9875, "learning_rate": 4.374845379221401e-06, "epoch": 2.8301772934091267, "total_flos": 2700504354130145280, "step": 703500 }, { "loss": 2.94, "learning_rate": 4.3740342538254705e-06, "epoch": 2.8305795929533213, "total_flos": 2700903440872058880, "step": 703600 }, { "loss": 3.035, "learning_rate": 4.3732231284295405e-06, "epoch": 2.830981892497516, "total_flos": 2701284352543027200, "step": 703700 }, { "loss": 2.99, "learning_rate": 4.372412003033609e-06, "epoch": 2.8313841920417104, "total_flos": 2701698873754890240, "step": 703800 }, { "loss": 3.0025, "learning_rate": 4.371600877637679e-06, "epoch": 2.831786491585905, "total_flos": 2702090280440524800, "step": 703900 }, { "loss": 2.895, "learning_rate": 4.370789752241748e-06, "epoch": 2.8321887911300996, "total_flos": 2702462476362977280, "step": 704000 }, { "loss": 2.96, "learning_rate": 4.369978626845818e-06, "epoch": 2.832591090674294, "total_flos": 2702861956136816640, "step": 704100 }, { "loss": 2.93, "learning_rate": 4.3691675014498865e-06, "epoch": 2.8329933902184887, "total_flos": 2703249432503193600, "step": 704200 }, { "loss": 2.975, "learning_rate": 4.3683563760539565e-06, "epoch": 2.8333956897626837, "total_flos": 2703652954132377600, "step": 704300 }, { "loss": 2.97, "learning_rate": 4.367545250658026e-06, "epoch": 2.833797989306878, "total_flos": 2704025967986135040, "step": 704400 }, { "loss": 2.95, "learning_rate": 4.366734125262096e-06, "epoch": 2.834200288851073, "total_flos": 2704408802326794240, "step": 704500 }, { "loss": 2.87, "learning_rate": 4.365922999866164e-06, "epoch": 2.8346025883952675, "total_flos": 2704767523627683840, "step": 704600 }, { "loss": 2.9475, "learning_rate": 4.365111874470234e-06, "epoch": 2.835004887939462, "total_flos": 2705145322910699520, "step": 704700 }, { "loss": 2.8825, "learning_rate": 4.364300749074303e-06, "epoch": 2.8354071874836566, "total_flos": 2705544919531868160, "step": 704800 }, { "loss": 2.9775, "learning_rate": 4.363489623678373e-06, "epoch": 2.835809487027851, "total_flos": 2705932342785822720, "step": 704900 }, { "loss": 2.945, "learning_rate": 4.362678498282442e-06, "epoch": 2.8362117865720458, "total_flos": 2706331923473264640, "step": 705000 }, { "loss": 2.9925, "learning_rate": 4.361867372886512e-06, "epoch": 2.8366140861162403, "total_flos": 2706715559811502080, "step": 705100 }, { "loss": 2.9675, "learning_rate": 4.361056247490581e-06, "epoch": 2.837016385660435, "total_flos": 2707096551151104000, "step": 705200 }, { "loss": 2.9525, "learning_rate": 4.360245122094651e-06, "epoch": 2.8374186852046295, "total_flos": 2707469888990638080, "step": 705300 }, { "loss": 2.9775, "learning_rate": 4.359433996698719e-06, "epoch": 2.8378209847488245, "total_flos": 2707860350275153920, "step": 705400 }, { "loss": 2.9375, "learning_rate": 4.3586228713027894e-06, "epoch": 2.8382232842930186, "total_flos": 2708253939881349120, "step": 705500 }, { "loss": 2.9425, "learning_rate": 4.357811745906859e-06, "epoch": 2.8386255838372136, "total_flos": 2708646355703009280, "step": 705600 }, { "loss": 2.9775, "learning_rate": 4.357000620510929e-06, "epoch": 2.839027883381408, "total_flos": 2709035568845598720, "step": 705700 }, { "loss": 2.94, "learning_rate": 4.356189495114998e-06, "epoch": 2.839430182925603, "total_flos": 2709406707830845440, "step": 705800 }, { "loss": 2.935, "learning_rate": 4.355378369719067e-06, "epoch": 2.8398324824697974, "total_flos": 2709795543875235840, "step": 705900 }, { "loss": 2.9425, "learning_rate": 4.354567244323136e-06, "epoch": 2.840234782013992, "total_flos": 2710188299616399360, "step": 706000 }, { "loss": 2.9475, "learning_rate": 4.353756118927206e-06, "epoch": 2.8406370815581865, "total_flos": 2710565716489973760, "step": 706100 }, { "loss": 2.99, "learning_rate": 4.3529449935312755e-06, "epoch": 2.841039381102381, "total_flos": 2710961037561139200, "step": 706200 }, { "loss": 2.9775, "learning_rate": 4.352133868135345e-06, "epoch": 2.841441680646576, "total_flos": 2711348131518074880, "step": 706300 }, { "loss": 2.97, "learning_rate": 4.351322742739414e-06, "epoch": 2.8418439801907702, "total_flos": 2711733520566251520, "step": 706400 }, { "loss": 2.95, "learning_rate": 4.350511617343484e-06, "epoch": 2.8422462797349652, "total_flos": 2712122128227225600, "step": 706500 }, { "loss": 2.985, "learning_rate": 4.349700491947553e-06, "epoch": 2.84264857927916, "total_flos": 2712503480731299840, "step": 706600 }, { "loss": 3.025, "learning_rate": 4.348889366551622e-06, "epoch": 2.8430508788233544, "total_flos": 2712891206726062080, "step": 706700 }, { "loss": 2.905, "learning_rate": 4.3480782411556915e-06, "epoch": 2.843453178367549, "total_flos": 2713293528014499840, "step": 706800 }, { "loss": 2.96, "learning_rate": 4.3472671157597616e-06, "epoch": 2.8438554779117435, "total_flos": 2713686644920135680, "step": 706900 }, { "loss": 2.9975, "learning_rate": 4.346455990363831e-06, "epoch": 2.844257777455938, "total_flos": 2714060763512279040, "step": 707000 }, { "loss": 2.9825, "learning_rate": 4.3456448649679e-06, "epoch": 2.8446600770001327, "total_flos": 2714435386672435200, "step": 707100 }, { "loss": 2.995, "learning_rate": 4.34483373957197e-06, "epoch": 2.8450623765443273, "total_flos": 2714802494424821760, "step": 707200 }, { "loss": 2.9475, "learning_rate": 4.344022614176039e-06, "epoch": 2.845464676088522, "total_flos": 2715183687591628800, "step": 707300 }, { "loss": 2.96, "learning_rate": 4.343211488780108e-06, "epoch": 2.845866975632717, "total_flos": 2715561800237936640, "step": 707400 }, { "loss": 2.935, "learning_rate": 4.342400363384178e-06, "epoch": 2.846269275176911, "total_flos": 2715955581048852480, "step": 707500 }, { "loss": 2.9975, "learning_rate": 4.341589237988248e-06, "epoch": 2.846671574721106, "total_flos": 2716343721320509440, "step": 707600 }, { "loss": 2.9625, "learning_rate": 4.340778112592317e-06, "epoch": 2.8470738742653006, "total_flos": 2716725158804459520, "step": 707700 }, { "loss": 2.9475, "learning_rate": 4.339966987196386e-06, "epoch": 2.847476173809495, "total_flos": 2717104211540643840, "step": 707800 }, { "loss": 2.985, "learning_rate": 4.339155861800455e-06, "epoch": 2.8478784733536897, "total_flos": 2717487932858757120, "step": 707900 }, { "loss": 2.9375, "learning_rate": 4.338344736404525e-06, "epoch": 2.8482807728978843, "total_flos": 2717863203990466560, "step": 708000 }, { "loss": 2.91, "learning_rate": 4.3375336110085945e-06, "epoch": 2.848683072442079, "total_flos": 2718241311325532160, "step": 708100 }, { "loss": 2.9575, "learning_rate": 4.336722485612664e-06, "epoch": 2.8490853719862734, "total_flos": 2718615631744880640, "step": 708200 }, { "loss": 2.9375, "learning_rate": 4.335911360216733e-06, "epoch": 2.849487671530468, "total_flos": 2718987678952550400, "step": 708300 }, { "loss": 3.0, "learning_rate": 4.335100234820803e-06, "epoch": 2.8498899710746626, "total_flos": 2719362790746992640, "step": 708400 }, { "loss": 2.91, "learning_rate": 4.334289109424872e-06, "epoch": 2.8502922706188576, "total_flos": 2719742672036966400, "step": 708500 }, { "loss": 2.97, "learning_rate": 4.333477984028941e-06, "epoch": 2.850694570163052, "total_flos": 2720129792550113280, "step": 708600 }, { "loss": 2.9575, "learning_rate": 4.3326668586330105e-06, "epoch": 2.8510968697072467, "total_flos": 2720508712505241600, "step": 708700 }, { "loss": 2.9875, "learning_rate": 4.3318557332370805e-06, "epoch": 2.8514991692514413, "total_flos": 2720886639258071040, "step": 708800 }, { "loss": 2.9725, "learning_rate": 4.33104460784115e-06, "epoch": 2.851901468795636, "total_flos": 2721270599582085120, "step": 708900 }, { "loss": 2.915, "learning_rate": 4.330233482445219e-06, "epoch": 2.8523037683398305, "total_flos": 2721648898121871360, "step": 709000 }, { "loss": 2.965, "learning_rate": 4.329422357049288e-06, "epoch": 2.852706067884025, "total_flos": 2722043778359930880, "step": 709100 }, { "loss": 2.965, "learning_rate": 4.328611231653358e-06, "epoch": 2.8531083674282196, "total_flos": 2722426697680465920, "step": 709200 }, { "loss": 2.945, "learning_rate": 4.327800106257427e-06, "epoch": 2.853510666972414, "total_flos": 2722825237364428800, "step": 709300 }, { "loss": 2.9525, "learning_rate": 4.3269889808614965e-06, "epoch": 2.853912966516609, "total_flos": 2723226889436344320, "step": 709400 }, { "loss": 2.9225, "learning_rate": 4.326177855465566e-06, "epoch": 2.8543152660608033, "total_flos": 2723619655799992320, "step": 709500 }, { "loss": 2.9875, "learning_rate": 4.325366730069636e-06, "epoch": 2.8547175656049983, "total_flos": 2724009431934259200, "step": 709600 }, { "loss": 2.935, "learning_rate": 4.324555604673705e-06, "epoch": 2.855119865149193, "total_flos": 2724402591329832960, "step": 709700 }, { "loss": 2.915, "learning_rate": 4.323744479277774e-06, "epoch": 2.8555221646933875, "total_flos": 2724785239777013760, "step": 709800 }, { "loss": 2.91, "learning_rate": 4.322933353881843e-06, "epoch": 2.855924464237582, "total_flos": 2725175775418920960, "step": 709900 }, { "loss": 2.92, "learning_rate": 4.322122228485913e-06, "epoch": 2.8563267637817766, "total_flos": 2725563565148590080, "step": 710000 }, { "loss": 2.9525, "learning_rate": 4.321311103089983e-06, "epoch": 2.856729063325971, "total_flos": 2725949363162419200, "step": 710100 }, { "loss": 2.9775, "learning_rate": 4.320499977694052e-06, "epoch": 2.857131362870166, "total_flos": 2726318579477975040, "step": 710200 }, { "loss": 2.9375, "learning_rate": 4.319688852298121e-06, "epoch": 2.8575336624143604, "total_flos": 2726702805364101120, "step": 710300 }, { "loss": 2.9475, "learning_rate": 4.318877726902191e-06, "epoch": 2.857935961958555, "total_flos": 2727095598283960320, "step": 710400 }, { "loss": 2.925, "learning_rate": 4.31806660150626e-06, "epoch": 2.85833826150275, "total_flos": 2727496501470720000, "step": 710500 }, { "loss": 2.9625, "learning_rate": 4.3172554761103294e-06, "epoch": 2.858740561046944, "total_flos": 2727889788336107520, "step": 710600 }, { "loss": 2.9125, "learning_rate": 4.316444350714399e-06, "epoch": 2.859142860591139, "total_flos": 2728280796678574080, "step": 710700 }, { "loss": 2.9825, "learning_rate": 4.315633225318469e-06, "epoch": 2.8595451601353337, "total_flos": 2728652742972641280, "step": 710800 }, { "loss": 2.9725, "learning_rate": 4.314822099922538e-06, "epoch": 2.8599474596795282, "total_flos": 2729050937425858560, "step": 710900 }, { "loss": 2.9725, "learning_rate": 4.314010974526607e-06, "epoch": 2.860349759223723, "total_flos": 2729422336661975040, "step": 711000 }, { "loss": 3.0025, "learning_rate": 4.313199849130676e-06, "epoch": 2.8607520587679174, "total_flos": 2729800104077537280, "step": 711100 }, { "loss": 2.9475, "learning_rate": 4.312388723734746e-06, "epoch": 2.861154358312112, "total_flos": 2730201655235850240, "step": 711200 }, { "loss": 2.9475, "learning_rate": 4.3115775983388155e-06, "epoch": 2.8615566578563065, "total_flos": 2730587554163281920, "step": 711300 }, { "loss": 2.9575, "learning_rate": 4.310766472942885e-06, "epoch": 2.861958957400501, "total_flos": 2730988298012774400, "step": 711400 }, { "loss": 2.9325, "learning_rate": 4.309955347546954e-06, "epoch": 2.8623612569446957, "total_flos": 2731371674100142080, "step": 711500 }, { "loss": 2.9975, "learning_rate": 4.309144222151024e-06, "epoch": 2.8627635564888907, "total_flos": 2731752426433843200, "step": 711600 }, { "loss": 2.9675, "learning_rate": 4.308333096755093e-06, "epoch": 2.8631658560330853, "total_flos": 2732123480439214080, "step": 711700 }, { "loss": 2.935, "learning_rate": 4.307521971359162e-06, "epoch": 2.86356815557728, "total_flos": 2732518206651248640, "step": 711800 }, { "loss": 2.975, "learning_rate": 4.306710845963232e-06, "epoch": 2.8639704551214744, "total_flos": 2732906054804582400, "step": 711900 }, { "loss": 2.9775, "learning_rate": 4.3058997205673016e-06, "epoch": 2.864372754665669, "total_flos": 2733286897429401600, "step": 712000 }, { "loss": 2.965, "learning_rate": 4.305088595171371e-06, "epoch": 2.8647750542098636, "total_flos": 2733679764706652160, "step": 712100 }, { "loss": 2.9625, "learning_rate": 4.30427746977544e-06, "epoch": 2.865177353754058, "total_flos": 2734058084491407360, "step": 712200 }, { "loss": 2.95, "learning_rate": 4.30346634437951e-06, "epoch": 2.8655796532982527, "total_flos": 2734455763754127360, "step": 712300 }, { "loss": 2.975, "learning_rate": 4.302655218983579e-06, "epoch": 2.8659819528424473, "total_flos": 2734832309583974400, "step": 712400 }, { "loss": 2.955, "learning_rate": 4.301844093587648e-06, "epoch": 2.8663842523866423, "total_flos": 2735210804639723520, "step": 712500 }, { "loss": 2.9425, "learning_rate": 4.301032968191718e-06, "epoch": 2.8667865519308364, "total_flos": 2735582219809566720, "step": 712600 }, { "loss": 3.005, "learning_rate": 4.300221842795788e-06, "epoch": 2.8671888514750314, "total_flos": 2735974901193338880, "step": 712700 }, { "loss": 2.9875, "learning_rate": 4.299410717399857e-06, "epoch": 2.867591151019226, "total_flos": 2736349396883681280, "step": 712800 }, { "loss": 2.945, "learning_rate": 4.298599592003926e-06, "epoch": 2.8679934505634206, "total_flos": 2736739815678259200, "step": 712900 }, { "loss": 2.91, "learning_rate": 4.297788466607995e-06, "epoch": 2.868395750107615, "total_flos": 2737122177318359040, "step": 713000 }, { "loss": 2.9375, "learning_rate": 4.296977341212065e-06, "epoch": 2.8687980496518097, "total_flos": 2737502690646159360, "step": 713100 }, { "loss": 2.9575, "learning_rate": 4.2961662158161345e-06, "epoch": 2.8692003491960043, "total_flos": 2737885652456632320, "step": 713200 }, { "loss": 2.9875, "learning_rate": 4.295355090420204e-06, "epoch": 2.869602648740199, "total_flos": 2738255298982809600, "step": 713300 }, { "loss": 2.93, "learning_rate": 4.294543965024273e-06, "epoch": 2.8700049482843935, "total_flos": 2738644692707635200, "step": 713400 }, { "loss": 3.03, "learning_rate": 4.293732839628343e-06, "epoch": 2.870407247828588, "total_flos": 2739027920080220160, "step": 713500 }, { "loss": 2.9425, "learning_rate": 4.292921714232412e-06, "epoch": 2.870809547372783, "total_flos": 2739411827291811840, "step": 713600 }, { "loss": 2.9675, "learning_rate": 4.292110588836481e-06, "epoch": 2.871211846916977, "total_flos": 2739798793778933760, "step": 713700 }, { "loss": 2.945, "learning_rate": 4.2912994634405505e-06, "epoch": 2.871614146461172, "total_flos": 2740186567574876160, "step": 713800 }, { "loss": 2.975, "learning_rate": 4.2904883380446205e-06, "epoch": 2.8720164460053668, "total_flos": 2740582892470824960, "step": 713900 }, { "loss": 2.9725, "learning_rate": 4.28967721264869e-06, "epoch": 2.8724187455495613, "total_flos": 2740977188472238080, "step": 714000 }, { "loss": 2.955, "learning_rate": 4.288866087252759e-06, "epoch": 2.872821045093756, "total_flos": 2741352342756618240, "step": 714100 }, { "loss": 2.9725, "learning_rate": 4.288054961856828e-06, "epoch": 2.8732233446379505, "total_flos": 2741767830614568960, "step": 714200 }, { "loss": 2.985, "learning_rate": 4.287243836460898e-06, "epoch": 2.873625644182145, "total_flos": 2742145613963857920, "step": 714300 }, { "loss": 2.9425, "learning_rate": 4.286432711064967e-06, "epoch": 2.8740279437263396, "total_flos": 2742520407083765760, "step": 714400 }, { "loss": 2.955, "learning_rate": 4.285621585669037e-06, "epoch": 2.8744302432705346, "total_flos": 2742909965457100800, "step": 714500 }, { "loss": 2.9475, "learning_rate": 4.284810460273106e-06, "epoch": 2.8748325428147288, "total_flos": 2743284349611356160, "step": 714600 }, { "loss": 2.9425, "learning_rate": 4.283999334877176e-06, "epoch": 2.875234842358924, "total_flos": 2743664485840957440, "step": 714700 }, { "loss": 2.96, "learning_rate": 4.283188209481245e-06, "epoch": 2.8756371419031184, "total_flos": 2744051319547023360, "step": 714800 }, { "loss": 2.925, "learning_rate": 4.282377084085315e-06, "epoch": 2.876039441447313, "total_flos": 2744429697755443200, "step": 714900 }, { "loss": 3.0, "learning_rate": 4.281565958689383e-06, "epoch": 2.8764417409915075, "total_flos": 2744801898989137920, "step": 715000 }, { "loss": 2.9325, "learning_rate": 4.280754833293453e-06, "epoch": 2.876844040535702, "total_flos": 2745183612657684480, "step": 715100 }, { "loss": 2.9125, "learning_rate": 4.279943707897523e-06, "epoch": 2.8772463400798967, "total_flos": 2745573314434560000, "step": 715200 }, { "loss": 2.945, "learning_rate": 4.279132582501593e-06, "epoch": 2.8776486396240912, "total_flos": 2745946073348689920, "step": 715300 }, { "loss": 2.985, "learning_rate": 4.278321457105661e-06, "epoch": 2.878050939168286, "total_flos": 2746334139262955520, "step": 715400 }, { "loss": 2.9575, "learning_rate": 4.277510331709731e-06, "epoch": 2.8784532387124804, "total_flos": 2746703764544163840, "step": 715500 }, { "loss": 2.9975, "learning_rate": 4.2766992063138e-06, "epoch": 2.8788555382566754, "total_flos": 2747083359027056640, "step": 715600 }, { "loss": 2.96, "learning_rate": 4.27588808091787e-06, "epoch": 2.8792578378008695, "total_flos": 2747462921642496000, "step": 715700 }, { "loss": 2.9525, "learning_rate": 4.275076955521939e-06, "epoch": 2.8796601373450645, "total_flos": 2747844183855452160, "step": 715800 }, { "loss": 2.9125, "learning_rate": 4.274265830126009e-06, "epoch": 2.880062436889259, "total_flos": 2748221478570455040, "step": 715900 }, { "loss": 2.89, "learning_rate": 4.273454704730078e-06, "epoch": 2.8804647364334537, "total_flos": 2748609061161676800, "step": 716000 }, { "loss": 2.94, "learning_rate": 4.272643579334148e-06, "epoch": 2.8808670359776483, "total_flos": 2748993148955504640, "step": 716100 }, { "loss": 2.915, "learning_rate": 4.271832453938216e-06, "epoch": 2.881269335521843, "total_flos": 2749371022595911680, "step": 716200 }, { "loss": 2.9375, "learning_rate": 4.271021328542286e-06, "epoch": 2.8816716350660374, "total_flos": 2749771777067888640, "step": 716300 }, { "loss": 2.9075, "learning_rate": 4.270210203146356e-06, "epoch": 2.882073934610232, "total_flos": 2750166200539115520, "step": 716400 }, { "loss": 2.9325, "learning_rate": 4.2693990777504255e-06, "epoch": 2.8824762341544266, "total_flos": 2750546294278778880, "step": 716500 }, { "loss": 2.93, "learning_rate": 4.268587952354495e-06, "epoch": 2.882878533698621, "total_flos": 2750909514201845760, "step": 716600 }, { "loss": 2.9475, "learning_rate": 4.267776826958564e-06, "epoch": 2.883280833242816, "total_flos": 2751286299037593600, "step": 716700 }, { "loss": 2.9375, "learning_rate": 4.266965701562634e-06, "epoch": 2.8836831327870107, "total_flos": 2751678592700682240, "step": 716800 }, { "loss": 2.9825, "learning_rate": 4.266154576166703e-06, "epoch": 2.8840854323312053, "total_flos": 2752071194415820800, "step": 716900 }, { "loss": 2.96, "learning_rate": 4.265343450770772e-06, "epoch": 2.8844877318754, "total_flos": 2752469957171957760, "step": 717000 }, { "loss": 2.9625, "learning_rate": 4.2645323253748416e-06, "epoch": 2.8848900314195944, "total_flos": 2752861666598400000, "step": 717100 }, { "loss": 2.92, "learning_rate": 4.263721199978912e-06, "epoch": 2.885292330963789, "total_flos": 2753268656468766720, "step": 717200 }, { "loss": 2.87, "learning_rate": 4.262910074582981e-06, "epoch": 2.8856946305079836, "total_flos": 2753643279628922880, "step": 717300 }, { "loss": 2.935, "learning_rate": 4.26209894918705e-06, "epoch": 2.886096930052178, "total_flos": 2754035727318036480, "step": 717400 }, { "loss": 2.9725, "learning_rate": 4.261287823791119e-06, "epoch": 2.8864992295963727, "total_flos": 2754418444811366400, "step": 717500 }, { "loss": 2.915, "learning_rate": 4.260476698395189e-06, "epoch": 2.8869015291405677, "total_flos": 2754825546217820160, "step": 717600 }, { "loss": 2.96, "learning_rate": 4.2596655729992584e-06, "epoch": 2.887303828684762, "total_flos": 2755195840715550720, "step": 717700 }, { "loss": 2.9425, "learning_rate": 4.258854447603328e-06, "epoch": 2.887706128228957, "total_flos": 2755584432442798080, "step": 717800 }, { "loss": 2.91, "learning_rate": 4.258043322207397e-06, "epoch": 2.8881084277731515, "total_flos": 2755978792179118080, "step": 717900 }, { "loss": 2.925, "learning_rate": 4.257232196811467e-06, "epoch": 2.888510727317346, "total_flos": 2756358609734184960, "step": 718000 }, { "loss": 2.975, "learning_rate": 4.256421071415536e-06, "epoch": 2.8889130268615406, "total_flos": 2756745523108884480, "step": 718100 }, { "loss": 2.9325, "learning_rate": 4.255609946019605e-06, "epoch": 2.889315326405735, "total_flos": 2757155444784967680, "step": 718200 }, { "loss": 2.96, "learning_rate": 4.2547988206236745e-06, "epoch": 2.8897176259499298, "total_flos": 2757541688943144960, "step": 718300 }, { "loss": 2.9075, "learning_rate": 4.2539876952277445e-06, "epoch": 2.8901199254941243, "total_flos": 2757918893367029760, "step": 718400 }, { "loss": 2.88, "learning_rate": 4.253176569831814e-06, "epoch": 2.890522225038319, "total_flos": 2758281672456990720, "step": 718500 }, { "loss": 2.93, "learning_rate": 4.252365444435883e-06, "epoch": 2.8909245245825135, "total_flos": 2758657336620625920, "step": 718600 }, { "loss": 2.965, "learning_rate": 4.251554319039952e-06, "epoch": 2.8913268241267085, "total_flos": 2759046905616445440, "step": 718700 }, { "loss": 2.9275, "learning_rate": 4.250743193644022e-06, "epoch": 2.8917291236709026, "total_flos": 2759430775649341440, "step": 718800 }, { "loss": 2.925, "learning_rate": 4.249932068248091e-06, "epoch": 2.8921314232150976, "total_flos": 2759822113288826880, "step": 718900 }, { "loss": 2.975, "learning_rate": 4.2491209428521605e-06, "epoch": 2.892533722759292, "total_flos": 2760211581371043840, "step": 719000 }, { "loss": 2.9325, "learning_rate": 4.24830981745623e-06, "epoch": 2.892936022303487, "total_flos": 2760608522371092480, "step": 719100 }, { "loss": 2.98, "learning_rate": 4.2474986920603e-06, "epoch": 2.8933383218476814, "total_flos": 2760989094122557440, "step": 719200 }, { "loss": 2.92, "learning_rate": 4.246687566664369e-06, "epoch": 2.893740621391876, "total_flos": 2761369984548556800, "step": 719300 }, { "loss": 2.98, "learning_rate": 4.245876441268438e-06, "epoch": 2.8941429209360705, "total_flos": 2761753243788595200, "step": 719400 }, { "loss": 2.9425, "learning_rate": 4.245065315872507e-06, "epoch": 2.894545220480265, "total_flos": 2762140194341990400, "step": 719500 }, { "loss": 2.95, "learning_rate": 4.244254190476577e-06, "epoch": 2.8949475200244597, "total_flos": 2762534415986012160, "step": 719600 }, { "loss": 2.945, "learning_rate": 4.243443065080647e-06, "epoch": 2.8953498195686542, "total_flos": 2762931441965936640, "step": 719700 }, { "loss": 2.935, "learning_rate": 4.242631939684716e-06, "epoch": 2.8957521191128492, "total_flos": 2763302575639941120, "step": 719800 }, { "loss": 2.945, "learning_rate": 4.241820814288785e-06, "epoch": 2.896154418657044, "total_flos": 2763687183935508480, "step": 719900 }, { "loss": 2.9225, "learning_rate": 4.241009688892855e-06, "epoch": 2.8965567182012384, "total_flos": 2764082165087170560, "step": 720000 }, { "loss": 2.905, "learning_rate": 4.240198563496924e-06, "epoch": 2.896959017745433, "total_flos": 2764458806519377920, "step": 720100 }, { "loss": 2.9675, "learning_rate": 4.239387438100993e-06, "epoch": 2.8973613172896275, "total_flos": 2764830837793320960, "step": 720200 }, { "loss": 2.9275, "learning_rate": 4.238576312705063e-06, "epoch": 2.897763616833822, "total_flos": 2765216051570503680, "step": 720300 }, { "loss": 2.9375, "learning_rate": 4.237765187309133e-06, "epoch": 2.8981659163780167, "total_flos": 2765596124065198080, "step": 720400 }, { "loss": 2.9325, "learning_rate": 4.236954061913202e-06, "epoch": 2.8985682159222113, "total_flos": 2765979611688652800, "step": 720500 }, { "loss": 2.9325, "learning_rate": 4.236142936517271e-06, "epoch": 2.898970515466406, "total_flos": 2766373100381245440, "step": 720600 }, { "loss": 2.9675, "learning_rate": 4.23533181112134e-06, "epoch": 2.899372815010601, "total_flos": 2766763280169922560, "step": 720700 }, { "loss": 2.89, "learning_rate": 4.23452068572541e-06, "epoch": 2.899775114554795, "total_flos": 2767148722330521600, "step": 720800 }, { "loss": 2.955, "learning_rate": 4.2337095603294795e-06, "epoch": 2.90017741409899, "total_flos": 2767532289622609920, "step": 720900 }, { "loss": 2.9325, "learning_rate": 4.232898434933549e-06, "epoch": 2.9005797136431846, "total_flos": 2767906716266803200, "step": 721000 }, { "loss": 2.8925, "learning_rate": 4.232087309537618e-06, "epoch": 2.900982013187379, "total_flos": 2768284334967582720, "step": 721100 }, { "loss": 2.91, "learning_rate": 4.231276184141688e-06, "epoch": 2.9013843127315737, "total_flos": 2768681530907258880, "step": 721200 }, { "loss": 2.92, "learning_rate": 4.230465058745757e-06, "epoch": 2.9017866122757683, "total_flos": 2769049238830018560, "step": 721300 }, { "loss": 2.905, "learning_rate": 4.229653933349826e-06, "epoch": 2.902188911819963, "total_flos": 2769446020492800000, "step": 721400 }, { "loss": 2.9075, "learning_rate": 4.228842807953896e-06, "epoch": 2.9025912113641574, "total_flos": 2769832583325511680, "step": 721500 }, { "loss": 2.925, "learning_rate": 4.2280316825579655e-06, "epoch": 2.902993510908352, "total_flos": 2770217494361886720, "step": 721600 }, { "loss": 2.955, "learning_rate": 4.227220557162035e-06, "epoch": 2.9033958104525466, "total_flos": 2770585664362721280, "step": 721700 }, { "loss": 2.9725, "learning_rate": 4.226409431766104e-06, "epoch": 2.9037981099967416, "total_flos": 2770977623417548800, "step": 721800 }, { "loss": 2.975, "learning_rate": 4.225598306370174e-06, "epoch": 2.9042004095409357, "total_flos": 2771359029034045440, "step": 721900 }, { "loss": 2.995, "learning_rate": 4.224787180974243e-06, "epoch": 2.9046027090851307, "total_flos": 2771738023346565120, "step": 722000 }, { "loss": 2.9525, "learning_rate": 4.223976055578312e-06, "epoch": 2.9050050086293253, "total_flos": 2772130625061703680, "step": 722100 }, { "loss": 2.9125, "learning_rate": 4.2231649301823816e-06, "epoch": 2.90540730817352, "total_flos": 2772520863274045440, "step": 722200 }, { "loss": 3.0175, "learning_rate": 4.222353804786452e-06, "epoch": 2.9058096077177145, "total_flos": 2772907436729241600, "step": 722300 }, { "loss": 2.9775, "learning_rate": 4.221542679390521e-06, "epoch": 2.906211907261909, "total_flos": 2773293994250711040, "step": 722400 }, { "loss": 2.9525, "learning_rate": 4.22073155399459e-06, "epoch": 2.9066142068061036, "total_flos": 2773682708136529920, "step": 722500 }, { "loss": 2.9825, "learning_rate": 4.219920428598659e-06, "epoch": 2.907016506350298, "total_flos": 2774057033867120640, "step": 722600 }, { "loss": 2.96, "learning_rate": 4.219109303202729e-06, "epoch": 2.907418805894493, "total_flos": 2774465447150407680, "step": 722700 }, { "loss": 2.915, "learning_rate": 4.2182981778067984e-06, "epoch": 2.9078211054386873, "total_flos": 2774848732946657280, "step": 722800 }, { "loss": 2.97, "learning_rate": 4.217487052410868e-06, "epoch": 2.9082234049828823, "total_flos": 2775238498458439680, "step": 722900 }, { "loss": 2.9375, "learning_rate": 4.216675927014937e-06, "epoch": 2.908625704527077, "total_flos": 2775627844382085120, "step": 723000 }, { "loss": 2.935, "learning_rate": 4.215864801619007e-06, "epoch": 2.9090280040712715, "total_flos": 2776018996128092160, "step": 723100 }, { "loss": 2.9575, "learning_rate": 4.215053676223076e-06, "epoch": 2.909430303615466, "total_flos": 2776379895038300160, "step": 723200 }, { "loss": 2.9325, "learning_rate": 4.214242550827145e-06, "epoch": 2.9098326031596606, "total_flos": 2776772794183004160, "step": 723300 }, { "loss": 2.945, "learning_rate": 4.2134314254312145e-06, "epoch": 2.910234902703855, "total_flos": 2777164620456775680, "step": 723400 }, { "loss": 2.9275, "learning_rate": 4.2126203000352845e-06, "epoch": 2.91063720224805, "total_flos": 2777562974247260160, "step": 723500 }, { "loss": 2.9825, "learning_rate": 4.211809174639354e-06, "epoch": 2.9110395017922444, "total_flos": 2777946796478976000, "step": 723600 }, { "loss": 2.9025, "learning_rate": 4.210998049243423e-06, "epoch": 2.911441801336439, "total_flos": 2778328382677708800, "step": 723700 }, { "loss": 3.0025, "learning_rate": 4.210186923847492e-06, "epoch": 2.911844100880634, "total_flos": 2778720198328995840, "step": 723800 }, { "loss": 2.94, "learning_rate": 4.209375798451562e-06, "epoch": 2.912246400424828, "total_flos": 2779099033304248320, "step": 723900 }, { "loss": 2.955, "learning_rate": 4.208564673055631e-06, "epoch": 2.912648699969023, "total_flos": 2779489829197025280, "step": 724000 }, { "loss": 2.985, "learning_rate": 4.2077535476597005e-06, "epoch": 2.9130509995132177, "total_flos": 2779873832010977280, "step": 724100 }, { "loss": 2.8925, "learning_rate": 4.20694242226377e-06, "epoch": 2.9134532990574122, "total_flos": 2780239888137400320, "step": 724200 }, { "loss": 2.92, "learning_rate": 4.20613129686784e-06, "epoch": 2.913855598601607, "total_flos": 2780626732465950720, "step": 724300 }, { "loss": 2.93, "learning_rate": 4.205320171471909e-06, "epoch": 2.9142578981458014, "total_flos": 2781000404913745920, "step": 724400 }, { "loss": 2.945, "learning_rate": 4.204509046075978e-06, "epoch": 2.914660197689996, "total_flos": 2781370088618618880, "step": 724500 }, { "loss": 2.9025, "learning_rate": 4.203697920680047e-06, "epoch": 2.9150624972341905, "total_flos": 2781770301343887360, "step": 724600 }, { "loss": 2.9925, "learning_rate": 4.202886795284117e-06, "epoch": 2.915464796778385, "total_flos": 2782163009283870720, "step": 724700 }, { "loss": 2.98, "learning_rate": 4.202075669888187e-06, "epoch": 2.9158670963225797, "total_flos": 2782538742493655040, "step": 724800 }, { "loss": 2.95, "learning_rate": 4.201264544492256e-06, "epoch": 2.9162693958667747, "total_flos": 2782940091824762880, "step": 724900 }, { "loss": 2.97, "learning_rate": 4.200453419096325e-06, "epoch": 2.916671695410969, "total_flos": 2783329363391016960, "step": 725000 }, { "loss": 2.9325, "learning_rate": 4.199642293700395e-06, "epoch": 2.917073994955164, "total_flos": 2783702727786762240, "step": 725100 }, { "loss": 2.9675, "learning_rate": 4.198831168304464e-06, "epoch": 2.9174762944993584, "total_flos": 2784089465890467840, "step": 725200 }, { "loss": 2.9225, "learning_rate": 4.198020042908533e-06, "epoch": 2.917878594043553, "total_flos": 2784473851113861120, "step": 725300 }, { "loss": 2.9725, "learning_rate": 4.197208917512603e-06, "epoch": 2.9182808935877476, "total_flos": 2784861794869555200, "step": 725400 }, { "loss": 2.9525, "learning_rate": 4.196397792116673e-06, "epoch": 2.918683193131942, "total_flos": 2785246201337917440, "step": 725500 }, { "loss": 2.9175, "learning_rate": 4.195586666720742e-06, "epoch": 2.9190854926761367, "total_flos": 2785616814510182400, "step": 725600 }, { "loss": 2.9625, "learning_rate": 4.194775541324811e-06, "epoch": 2.9194877922203313, "total_flos": 2785998963700592640, "step": 725700 }, { "loss": 2.9675, "learning_rate": 4.19396441592888e-06, "epoch": 2.9198900917645263, "total_flos": 2786377543736217600, "step": 725800 }, { "loss": 2.975, "learning_rate": 4.19315329053295e-06, "epoch": 2.9202923913087204, "total_flos": 2786768355562721280, "step": 725900 }, { "loss": 2.98, "learning_rate": 4.19234216513702e-06, "epoch": 2.9206946908529154, "total_flos": 2787159868473200640, "step": 726000 }, { "loss": 2.94, "learning_rate": 4.1915310397410895e-06, "epoch": 2.92109699039711, "total_flos": 2787555534775111680, "step": 726100 }, { "loss": 2.9375, "learning_rate": 4.190719914345159e-06, "epoch": 2.9214992899413046, "total_flos": 2787936781054341120, "step": 726200 }, { "loss": 2.9675, "learning_rate": 4.189908788949228e-06, "epoch": 2.921901589485499, "total_flos": 2788321394661150720, "step": 726300 }, { "loss": 2.93, "learning_rate": 4.189097663553298e-06, "epoch": 2.9223038890296937, "total_flos": 2788725766089093120, "step": 726400 }, { "loss": 2.945, "learning_rate": 4.188286538157367e-06, "epoch": 2.9227061885738883, "total_flos": 2789122924850073600, "step": 726500 }, { "loss": 2.91, "learning_rate": 4.187475412761436e-06, "epoch": 2.923108488118083, "total_flos": 2789509806357319680, "step": 726600 }, { "loss": 2.8675, "learning_rate": 4.1866642873655055e-06, "epoch": 2.9235107876622775, "total_flos": 2789894993578291200, "step": 726700 }, { "loss": 2.9325, "learning_rate": 4.185853161969576e-06, "epoch": 2.923913087206472, "total_flos": 2790266663687761920, "step": 726800 }, { "loss": 2.94, "learning_rate": 4.185042036573645e-06, "epoch": 2.924315386750667, "total_flos": 2790651394141900800, "step": 726900 }, { "loss": 2.945, "learning_rate": 4.184230911177714e-06, "epoch": 2.924717686294861, "total_flos": 2791031036425973760, "step": 727000 }, { "loss": 2.945, "learning_rate": 4.183419785781783e-06, "epoch": 2.925119985839056, "total_flos": 2791409945758617600, "step": 727100 }, { "loss": 2.93, "learning_rate": 4.182608660385853e-06, "epoch": 2.9255222853832508, "total_flos": 2791791808141946880, "step": 727200 }, { "loss": 2.9175, "learning_rate": 4.181797534989922e-06, "epoch": 2.9259245849274453, "total_flos": 2792180893814722560, "step": 727300 }, { "loss": 2.9475, "learning_rate": 4.180986409593992e-06, "epoch": 2.92632688447164, "total_flos": 2792580277986201600, "step": 727400 }, { "loss": 2.905, "learning_rate": 4.180175284198061e-06, "epoch": 2.9267291840158345, "total_flos": 2792955007371202560, "step": 727500 }, { "loss": 2.9475, "learning_rate": 4.179364158802131e-06, "epoch": 2.927131483560029, "total_flos": 2793338240055029760, "step": 727600 }, { "loss": 2.9425, "learning_rate": 4.1785530334062e-06, "epoch": 2.9275337831042236, "total_flos": 2793709288749158400, "step": 727700 }, { "loss": 3.0425, "learning_rate": 4.177741908010269e-06, "epoch": 2.927936082648418, "total_flos": 2794090067639070720, "step": 727800 }, { "loss": 2.915, "learning_rate": 4.1769307826143384e-06, "epoch": 2.9283383821926128, "total_flos": 2794477262509608960, "step": 727900 }, { "loss": 2.96, "learning_rate": 4.1761196572184085e-06, "epoch": 2.928740681736808, "total_flos": 2794868111514808320, "step": 728000 }, { "loss": 2.94, "learning_rate": 4.175308531822478e-06, "epoch": 2.9291429812810024, "total_flos": 2795251105192734720, "step": 728100 }, { "loss": 2.9325, "learning_rate": 4.174497406426547e-06, "epoch": 2.929545280825197, "total_flos": 2795640902571970560, "step": 728200 }, { "loss": 2.92, "learning_rate": 4.173686281030616e-06, "epoch": 2.9299475803693915, "total_flos": 2796026174772817920, "step": 728300 }, { "loss": 2.965, "learning_rate": 4.172875155634686e-06, "epoch": 2.930349879913586, "total_flos": 2796410097918136320, "step": 728400 }, { "loss": 2.9525, "learning_rate": 4.172064030238755e-06, "epoch": 2.9307521794577807, "total_flos": 2796776164667043840, "step": 728500 }, { "loss": 2.9725, "learning_rate": 4.1712529048428245e-06, "epoch": 2.9311544790019752, "total_flos": 2797167018983485440, "step": 728600 }, { "loss": 2.9725, "learning_rate": 4.170441779446894e-06, "epoch": 2.93155677854617, "total_flos": 2797557060679864320, "step": 728700 }, { "loss": 2.855, "learning_rate": 4.169630654050964e-06, "epoch": 2.9319590780903644, "total_flos": 2797926261061693440, "step": 728800 }, { "loss": 2.9125, "learning_rate": 4.168819528655033e-06, "epoch": 2.9323613776345594, "total_flos": 2798316717034967040, "step": 728900 }, { "loss": 2.93, "learning_rate": 4.168008403259102e-06, "epoch": 2.9327636771787535, "total_flos": 2798695472341585920, "step": 729000 }, { "loss": 2.93, "learning_rate": 4.167197277863171e-06, "epoch": 2.9331659767229485, "total_flos": 2799087224257966080, "step": 729100 }, { "loss": 2.955, "learning_rate": 4.166386152467241e-06, "epoch": 2.933568276267143, "total_flos": 2799481817688944640, "step": 729200 }, { "loss": 2.94, "learning_rate": 4.1655750270713106e-06, "epoch": 2.9339705758113377, "total_flos": 2799864593605939200, "step": 729300 }, { "loss": 2.92, "learning_rate": 4.16476390167538e-06, "epoch": 2.9343728753555323, "total_flos": 2800244267757465600, "step": 729400 }, { "loss": 2.925, "learning_rate": 4.163952776279449e-06, "epoch": 2.934775174899727, "total_flos": 2800629141615144960, "step": 729500 }, { "loss": 2.88, "learning_rate": 4.163141650883519e-06, "epoch": 2.9351774744439214, "total_flos": 2801015364528353280, "step": 729600 }, { "loss": 2.93, "learning_rate": 4.162330525487588e-06, "epoch": 2.935579773988116, "total_flos": 2801390423210373120, "step": 729700 }, { "loss": 2.9275, "learning_rate": 4.161519400091657e-06, "epoch": 2.9359820735323106, "total_flos": 2801769789309849600, "step": 729800 }, { "loss": 2.9625, "learning_rate": 4.160708274695727e-06, "epoch": 2.936384373076505, "total_flos": 2802153170708459520, "step": 729900 }, { "loss": 2.92, "learning_rate": 4.159897149299797e-06, "epoch": 2.9367866726207, "total_flos": 2802532696145203200, "step": 730000 }, { "loss": 2.935, "learning_rate": 4.159086023903866e-06, "epoch": 2.9371889721648943, "total_flos": 2802918260464373760, "step": 730100 }, { "loss": 2.95, "learning_rate": 4.158274898507935e-06, "epoch": 2.9375912717090893, "total_flos": 2803299963510435840, "step": 730200 }, { "loss": 2.955, "learning_rate": 4.157463773112004e-06, "epoch": 2.937993571253284, "total_flos": 2803687636392775680, "step": 730300 }, { "loss": 2.8875, "learning_rate": 4.156652647716074e-06, "epoch": 2.9383958707974784, "total_flos": 2804079521090211840, "step": 730400 }, { "loss": 2.9675, "learning_rate": 4.1558415223201435e-06, "epoch": 2.938798170341673, "total_flos": 2804462663482920960, "step": 730500 }, { "loss": 2.9325, "learning_rate": 4.155030396924213e-06, "epoch": 2.9392004698858676, "total_flos": 2804860395858063360, "step": 730600 }, { "loss": 2.9475, "learning_rate": 4.154219271528283e-06, "epoch": 2.939602769430062, "total_flos": 2805244499585617920, "step": 730700 }, { "loss": 2.915, "learning_rate": 4.153408146132352e-06, "epoch": 2.9400050689742567, "total_flos": 2805631864415907840, "step": 730800 }, { "loss": 3.0, "learning_rate": 4.152597020736421e-06, "epoch": 2.9404073685184513, "total_flos": 2806014640332902400, "step": 730900 }, { "loss": 2.9725, "learning_rate": 4.15178589534049e-06, "epoch": 2.940809668062646, "total_flos": 2806396460226293760, "step": 731000 }, { "loss": 2.9075, "learning_rate": 4.15097476994456e-06, "epoch": 2.941211967606841, "total_flos": 2806781477487513600, "step": 731100 }, { "loss": 2.9225, "learning_rate": 4.1501636445486295e-06, "epoch": 2.9416142671510355, "total_flos": 2807174934312652800, "step": 731200 }, { "loss": 2.91, "learning_rate": 4.149352519152699e-06, "epoch": 2.94201656669523, "total_flos": 2807546567243427840, "step": 731300 }, { "loss": 2.93, "learning_rate": 4.148541393756768e-06, "epoch": 2.9424188662394246, "total_flos": 2807925232258928640, "step": 731400 }, { "loss": 2.945, "learning_rate": 4.147730268360838e-06, "epoch": 2.942821165783619, "total_flos": 2808314371044126720, "step": 731500 }, { "loss": 2.9675, "learning_rate": 4.146919142964907e-06, "epoch": 2.9432234653278138, "total_flos": 2808712225577840640, "step": 731600 }, { "loss": 2.92, "learning_rate": 4.146108017568976e-06, "epoch": 2.9436257648720083, "total_flos": 2809098156372725760, "step": 731700 }, { "loss": 2.9575, "learning_rate": 4.1452968921730455e-06, "epoch": 2.944028064416203, "total_flos": 2809480772952453120, "step": 731800 }, { "loss": 2.915, "learning_rate": 4.144485766777116e-06, "epoch": 2.9444303639603975, "total_flos": 2809875387628400640, "step": 731900 }, { "loss": 2.9, "learning_rate": 4.143674641381185e-06, "epoch": 2.9448326635045925, "total_flos": 2810254196047441920, "step": 732000 }, { "loss": 2.91, "learning_rate": 4.142863515985254e-06, "epoch": 2.9452349630487866, "total_flos": 2810643637573447680, "step": 732100 }, { "loss": 2.9525, "learning_rate": 4.142052390589323e-06, "epoch": 2.9456372625929816, "total_flos": 2811014760624967680, "step": 732200 }, { "loss": 2.9025, "learning_rate": 4.141241265193393e-06, "epoch": 2.946039562137176, "total_flos": 2811375569244057600, "step": 732300 }, { "loss": 2.955, "learning_rate": 4.140430139797462e-06, "epoch": 2.946441861681371, "total_flos": 2811773062613299200, "step": 732400 }, { "loss": 2.9225, "learning_rate": 4.139619014401532e-06, "epoch": 2.9468441612255654, "total_flos": 2812154308892528640, "step": 732500 }, { "loss": 2.985, "learning_rate": 4.138807889005601e-06, "epoch": 2.94724646076976, "total_flos": 2812555987520655360, "step": 732600 }, { "loss": 2.9675, "learning_rate": 4.137996763609671e-06, "epoch": 2.9476487603139545, "total_flos": 2812957235938160640, "step": 732700 }, { "loss": 2.915, "learning_rate": 4.13718563821374e-06, "epoch": 2.948051059858149, "total_flos": 2813351898415288320, "step": 732800 }, { "loss": 2.935, "learning_rate": 4.136374512817809e-06, "epoch": 2.9484533594023437, "total_flos": 2813747883391733760, "step": 732900 }, { "loss": 2.965, "learning_rate": 4.1355633874218784e-06, "epoch": 2.9488556589465382, "total_flos": 2814140984363642880, "step": 733000 }, { "loss": 2.995, "learning_rate": 4.1347522620259485e-06, "epoch": 2.9492579584907332, "total_flos": 2814527988029460480, "step": 733100 }, { "loss": 2.9075, "learning_rate": 4.133941136630018e-06, "epoch": 2.9496602580349274, "total_flos": 2814913302720245760, "step": 733200 }, { "loss": 2.9775, "learning_rate": 4.133130011234087e-06, "epoch": 2.9500625575791224, "total_flos": 2815287208862699520, "step": 733300 }, { "loss": 2.965, "learning_rate": 4.132318885838156e-06, "epoch": 2.950464857123317, "total_flos": 2815687830553620480, "step": 733400 }, { "loss": 2.9325, "learning_rate": 4.131507760442226e-06, "epoch": 2.9508671566675115, "total_flos": 2816077107431116800, "step": 733500 }, { "loss": 2.9625, "learning_rate": 4.130696635046295e-06, "epoch": 2.951269456211706, "total_flos": 2816468827480043520, "step": 733600 }, { "loss": 2.9675, "learning_rate": 4.1298855096503645e-06, "epoch": 2.9516717557559007, "total_flos": 2816870522041896960, "step": 733700 }, { "loss": 2.9325, "learning_rate": 4.129074384254434e-06, "epoch": 2.9520740553000953, "total_flos": 2817263713304924160, "step": 733800 }, { "loss": 2.9425, "learning_rate": 4.128263258858504e-06, "epoch": 2.95247635484429, "total_flos": 2817654636667514880, "step": 733900 }, { "loss": 2.905, "learning_rate": 4.127452133462573e-06, "epoch": 2.952878654388485, "total_flos": 2818055587655454720, "step": 734000 }, { "loss": 2.91, "learning_rate": 4.126641008066642e-06, "epoch": 2.953280953932679, "total_flos": 2818436648041205760, "step": 734100 }, { "loss": 2.945, "learning_rate": 4.125829882670711e-06, "epoch": 2.953683253476874, "total_flos": 2818803192801914880, "step": 734200 }, { "loss": 2.87, "learning_rate": 4.125018757274781e-06, "epoch": 2.9540855530210686, "total_flos": 2819197106393886720, "step": 734300 }, { "loss": 2.94, "learning_rate": 4.1242076318788506e-06, "epoch": 2.954487852565263, "total_flos": 2819572988318453760, "step": 734400 }, { "loss": 2.8825, "learning_rate": 4.12339650648292e-06, "epoch": 2.9548901521094577, "total_flos": 2819965366961418240, "step": 734500 }, { "loss": 2.91, "learning_rate": 4.122585381086989e-06, "epoch": 2.9552924516536523, "total_flos": 2820333802524364800, "step": 734600 }, { "loss": 2.945, "learning_rate": 4.121774255691059e-06, "epoch": 2.955694751197847, "total_flos": 2820717507908751360, "step": 734700 }, { "loss": 2.8875, "learning_rate": 4.120963130295128e-06, "epoch": 2.9560970507420414, "total_flos": 2821104378793512960, "step": 734800 }, { "loss": 2.9025, "learning_rate": 4.120152004899197e-06, "epoch": 2.956499350286236, "total_flos": 2821474370550435840, "step": 734900 }, { "loss": 2.975, "learning_rate": 4.119340879503267e-06, "epoch": 2.9569016498304306, "total_flos": 2821843554998538240, "step": 735000 }, { "loss": 2.975, "learning_rate": 4.118529754107337e-06, "epoch": 2.9573039493746256, "total_flos": 2822214221283225600, "step": 735100 }, { "loss": 2.9275, "learning_rate": 4.117718628711406e-06, "epoch": 2.9577062489188197, "total_flos": 2822586475629342720, "step": 735200 }, { "loss": 2.96, "learning_rate": 4.116907503315475e-06, "epoch": 2.9581085484630147, "total_flos": 2822973075640750080, "step": 735300 }, { "loss": 2.955, "learning_rate": 4.116096377919545e-06, "epoch": 2.9585108480072093, "total_flos": 2823359271997747200, "step": 735400 }, { "loss": 2.935, "learning_rate": 4.115285252523614e-06, "epoch": 2.958913147551404, "total_flos": 2823726703735910400, "step": 735500 }, { "loss": 2.915, "learning_rate": 4.114474127127684e-06, "epoch": 2.9593154470955985, "total_flos": 2824105852074455040, "step": 735600 }, { "loss": 2.9825, "learning_rate": 4.113663001731753e-06, "epoch": 2.959717746639793, "total_flos": 2824485409378652160, "step": 735700 }, { "loss": 2.86, "learning_rate": 4.112851876335823e-06, "epoch": 2.9601200461839876, "total_flos": 2824865965196390400, "step": 735800 }, { "loss": 2.8325, "learning_rate": 4.112040750939892e-06, "epoch": 2.960522345728182, "total_flos": 2825243302401331200, "step": 735900 }, { "loss": 2.98, "learning_rate": 4.111229625543962e-06, "epoch": 2.9609246452723768, "total_flos": 2825643881602314240, "step": 736000 }, { "loss": 2.9425, "learning_rate": 4.11041850014803e-06, "epoch": 2.9613269448165713, "total_flos": 2826029860198379520, "step": 736100 }, { "loss": 2.9325, "learning_rate": 4.1096073747521e-06, "epoch": 2.9617292443607663, "total_flos": 2826418090761154560, "step": 736200 }, { "loss": 2.91, "learning_rate": 4.1087962493561695e-06, "epoch": 2.962131543904961, "total_flos": 2826797260344668160, "step": 736300 }, { "loss": 2.945, "learning_rate": 4.1079851239602396e-06, "epoch": 2.9625338434491555, "total_flos": 2827179011191910400, "step": 736400 }, { "loss": 2.9575, "learning_rate": 4.107173998564308e-06, "epoch": 2.96293614299335, "total_flos": 2827578947732582400, "step": 736500 }, { "loss": 2.94, "learning_rate": 4.106362873168378e-06, "epoch": 2.9633384425375446, "total_flos": 2827977758289899520, "step": 736600 }, { "loss": 2.9775, "learning_rate": 4.105551747772447e-06, "epoch": 2.963740742081739, "total_flos": 2828386091904552960, "step": 736700 }, { "loss": 2.895, "learning_rate": 4.104740622376517e-06, "epoch": 2.964143041625934, "total_flos": 2828769282098442240, "step": 736800 }, { "loss": 2.91, "learning_rate": 4.1039294969805855e-06, "epoch": 2.9645453411701284, "total_flos": 2829147352254812160, "step": 736900 }, { "loss": 2.975, "learning_rate": 4.103118371584656e-06, "epoch": 2.964947640714323, "total_flos": 2829528715381370880, "step": 737000 }, { "loss": 2.94, "learning_rate": 4.102307246188725e-06, "epoch": 2.965349940258518, "total_flos": 2829915469418803200, "step": 737100 }, { "loss": 2.945, "learning_rate": 4.101496120792795e-06, "epoch": 2.965752239802712, "total_flos": 2830286873966161920, "step": 737200 }, { "loss": 2.9275, "learning_rate": 4.100684995396864e-06, "epoch": 2.966154539346907, "total_flos": 2830666293178060800, "step": 737300 }, { "loss": 2.9, "learning_rate": 4.099873870000933e-06, "epoch": 2.9665568388911017, "total_flos": 2831066006646558720, "step": 737400 }, { "loss": 2.9325, "learning_rate": 4.099062744605002e-06, "epoch": 2.9669591384352962, "total_flos": 2831441649565224960, "step": 737500 }, { "loss": 2.955, "learning_rate": 4.0982516192090725e-06, "epoch": 2.967361437979491, "total_flos": 2831817504933580800, "step": 737600 }, { "loss": 2.95, "learning_rate": 4.097440493813142e-06, "epoch": 2.9677637375236854, "total_flos": 2832202957716664320, "step": 737700 }, { "loss": 2.915, "learning_rate": 4.096629368417211e-06, "epoch": 2.96816603706788, "total_flos": 2832598438125096960, "step": 737800 }, { "loss": 2.92, "learning_rate": 4.09581824302128e-06, "epoch": 2.9685683366120745, "total_flos": 2832982621521285120, "step": 737900 }, { "loss": 2.945, "learning_rate": 4.09500711762535e-06, "epoch": 2.968970636156269, "total_flos": 2833359401045790720, "step": 738000 }, { "loss": 2.9075, "learning_rate": 4.094195992229419e-06, "epoch": 2.9693729357004637, "total_flos": 2833746728697384960, "step": 738100 }, { "loss": 2.945, "learning_rate": 4.0933848668334885e-06, "epoch": 2.9697752352446587, "total_flos": 2834122870872821760, "step": 738200 }, { "loss": 2.94, "learning_rate": 4.092573741437558e-06, "epoch": 2.970177534788853, "total_flos": 2834485028547440640, "step": 738300 }, { "loss": 2.9175, "learning_rate": 4.091762616041628e-06, "epoch": 2.970579834333048, "total_flos": 2834868170940149760, "step": 738400 }, { "loss": 2.925, "learning_rate": 4.090951490645697e-06, "epoch": 2.9709821338772424, "total_flos": 2835246113626705920, "step": 738500 }, { "loss": 2.9525, "learning_rate": 4.090140365249766e-06, "epoch": 2.971384433421437, "total_flos": 2835617948384686080, "step": 738600 }, { "loss": 2.935, "learning_rate": 4.089329239853835e-06, "epoch": 2.9717867329656316, "total_flos": 2836004357191372800, "step": 738700 }, { "loss": 2.965, "learning_rate": 4.088518114457905e-06, "epoch": 2.972189032509826, "total_flos": 2836386660407808000, "step": 738800 }, { "loss": 2.92, "learning_rate": 4.0877069890619745e-06, "epoch": 2.9725913320540207, "total_flos": 2836776186913689600, "step": 738900 }, { "loss": 2.9375, "learning_rate": 4.086895863666044e-06, "epoch": 2.9729936315982153, "total_flos": 2837153433827512320, "step": 739000 }, { "loss": 2.9425, "learning_rate": 4.086084738270113e-06, "epoch": 2.97339593114241, "total_flos": 2837531424315248640, "step": 739100 }, { "loss": 2.925, "learning_rate": 4.085273612874183e-06, "epoch": 2.9737982306866044, "total_flos": 2837902749193973760, "step": 739200 }, { "loss": 2.9525, "learning_rate": 4.084462487478252e-06, "epoch": 2.9742005302307994, "total_flos": 2838286560803205120, "step": 739300 }, { "loss": 2.93, "learning_rate": 4.083651362082321e-06, "epoch": 2.974602829774994, "total_flos": 2838660514746839040, "step": 739400 }, { "loss": 2.9675, "learning_rate": 4.0828402366863906e-06, "epoch": 2.9750051293191886, "total_flos": 2839049504817254400, "step": 739500 }, { "loss": 2.94, "learning_rate": 4.082029111290461e-06, "epoch": 2.975407428863383, "total_flos": 2839431138817167360, "step": 739600 }, { "loss": 2.9825, "learning_rate": 4.08121798589453e-06, "epoch": 2.9758097284075777, "total_flos": 2839836200706600960, "step": 739700 }, { "loss": 2.9475, "learning_rate": 4.080406860498599e-06, "epoch": 2.9762120279517723, "total_flos": 2840231038454722560, "step": 739800 }, { "loss": 2.9075, "learning_rate": 4.079595735102668e-06, "epoch": 2.976614327495967, "total_flos": 2840629604694896640, "step": 739900 }, { "loss": 2.875, "learning_rate": 4.078784609706738e-06, "epoch": 2.9770166270401615, "total_flos": 2841019014353448960, "step": 740000 }, { "loss": 3.0075, "learning_rate": 4.0779734843108074e-06, "epoch": 2.977418926584356, "total_flos": 2841411297394053120, "step": 740100 }, { "loss": 2.9825, "learning_rate": 4.077162358914877e-06, "epoch": 2.977821226128551, "total_flos": 2841809459979816960, "step": 740200 }, { "loss": 2.94, "learning_rate": 4.076351233518947e-06, "epoch": 2.978223525672745, "total_flos": 2842198763413524480, "step": 740300 }, { "loss": 2.8975, "learning_rate": 4.075540108123016e-06, "epoch": 2.97862582521694, "total_flos": 2842570205139578880, "step": 740400 }, { "loss": 2.8975, "learning_rate": 4.074728982727085e-06, "epoch": 2.9790281247611348, "total_flos": 2842956417430302720, "step": 740500 }, { "loss": 2.8825, "learning_rate": 4.073917857331154e-06, "epoch": 2.9794304243053293, "total_flos": 2843324571497410560, "step": 740600 }, { "loss": 2.93, "learning_rate": 4.073106731935224e-06, "epoch": 2.979832723849524, "total_flos": 2843711888526520320, "step": 740700 }, { "loss": 2.9025, "learning_rate": 4.0722956065392935e-06, "epoch": 2.9802350233937185, "total_flos": 2844091764505251840, "step": 740800 }, { "loss": 2.8775, "learning_rate": 4.071484481143363e-06, "epoch": 2.980637322937913, "total_flos": 2844468459049881600, "step": 740900 }, { "loss": 2.8825, "learning_rate": 4.070673355747432e-06, "epoch": 2.9810396224821076, "total_flos": 2844869526885150720, "step": 741000 }, { "loss": 2.955, "learning_rate": 4.069862230351502e-06, "epoch": 2.981441922026302, "total_flos": 2845243507384995840, "step": 741100 }, { "loss": 2.955, "learning_rate": 4.069051104955571e-06, "epoch": 2.9818442215704968, "total_flos": 2845639418004049920, "step": 741200 }, { "loss": 2.9225, "learning_rate": 4.06823997955964e-06, "epoch": 2.982246521114692, "total_flos": 2846022751601479680, "step": 741300 }, { "loss": 2.93, "learning_rate": 4.0674288541637095e-06, "epoch": 2.982648820658886, "total_flos": 2846412235617423360, "step": 741400 }, { "loss": 2.92, "learning_rate": 4.0666177287677796e-06, "epoch": 2.983051120203081, "total_flos": 2846792074417459200, "step": 741500 }, { "loss": 2.9025, "learning_rate": 4.065806603371849e-06, "epoch": 2.9834534197472755, "total_flos": 2847172837373644800, "step": 741600 }, { "loss": 2.9125, "learning_rate": 4.064995477975918e-06, "epoch": 2.98385571929147, "total_flos": 2847547109991813120, "step": 741700 }, { "loss": 2.92, "learning_rate": 4.064184352579987e-06, "epoch": 2.9842580188356647, "total_flos": 2847922620129423360, "step": 741800 }, { "loss": 2.9225, "learning_rate": 4.063373227184057e-06, "epoch": 2.9846603183798592, "total_flos": 2848310962228285440, "step": 741900 }, { "loss": 2.935, "learning_rate": 4.062562101788126e-06, "epoch": 2.985062617924054, "total_flos": 2848699453041930240, "step": 742000 }, { "loss": 2.9225, "learning_rate": 4.061750976392196e-06, "epoch": 2.9854649174682484, "total_flos": 2849084528726814720, "step": 742100 }, { "loss": 2.8975, "learning_rate": 4.060939850996265e-06, "epoch": 2.9858672170124434, "total_flos": 2849474485443317760, "step": 742200 }, { "loss": 2.9675, "learning_rate": 4.060128725600335e-06, "epoch": 2.9862695165566375, "total_flos": 2849868935470755840, "step": 742300 }, { "loss": 2.96, "learning_rate": 4.059317600204404e-06, "epoch": 2.9866718161008325, "total_flos": 2850253761527255040, "step": 742400 }, { "loss": 2.935, "learning_rate": 4.058506474808473e-06, "epoch": 2.987074115645027, "total_flos": 2850640573988352000, "step": 742500 }, { "loss": 2.9275, "learning_rate": 4.057695349412542e-06, "epoch": 2.9874764151892217, "total_flos": 2851039565127905280, "step": 742600 }, { "loss": 2.9325, "learning_rate": 4.0568842240166125e-06, "epoch": 2.9878787147334163, "total_flos": 2851435300475965440, "step": 742700 }, { "loss": 2.8925, "learning_rate": 4.056073098620682e-06, "epoch": 2.988281014277611, "total_flos": 2851840776642293760, "step": 742800 }, { "loss": 2.9225, "learning_rate": 4.055261973224751e-06, "epoch": 2.9886833138218054, "total_flos": 2852219701908664320, "step": 742900 }, { "loss": 2.925, "learning_rate": 4.05445084782882e-06, "epoch": 2.989085613366, "total_flos": 2852584616118005760, "step": 743000 }, { "loss": 2.91, "learning_rate": 4.05363972243289e-06, "epoch": 2.9894879129101946, "total_flos": 2852973616810905600, "step": 743100 }, { "loss": 2.94, "learning_rate": 4.052828597036959e-06, "epoch": 2.989890212454389, "total_flos": 2853335886021611520, "step": 743200 }, { "loss": 2.8925, "learning_rate": 4.0520174716410285e-06, "epoch": 2.990292511998584, "total_flos": 2853721933663825920, "step": 743300 }, { "loss": 2.935, "learning_rate": 4.051206346245098e-06, "epoch": 2.9906948115427783, "total_flos": 2854089901837455360, "step": 743400 }, { "loss": 2.98, "learning_rate": 4.050395220849168e-06, "epoch": 2.9910971110869733, "total_flos": 2854478775060541440, "step": 743500 }, { "loss": 2.935, "learning_rate": 4.049584095453237e-06, "epoch": 2.991499410631168, "total_flos": 2854877383790653440, "step": 743600 }, { "loss": 2.905, "learning_rate": 4.048772970057306e-06, "epoch": 2.9919017101753624, "total_flos": 2855291118938664960, "step": 743700 }, { "loss": 2.96, "learning_rate": 4.047961844661375e-06, "epoch": 2.992304009719557, "total_flos": 2855662879339253760, "step": 743800 }, { "loss": 2.9125, "learning_rate": 4.047150719265445e-06, "epoch": 2.9927063092637516, "total_flos": 2856035744478228480, "step": 743900 }, { "loss": 2.9525, "learning_rate": 4.0463395938695145e-06, "epoch": 2.993108608807946, "total_flos": 2856416603036774400, "step": 744000 }, { "loss": 2.97, "learning_rate": 4.045528468473584e-06, "epoch": 2.9935109083521407, "total_flos": 2856796861424947200, "step": 744100 }, { "loss": 2.9625, "learning_rate": 4.044717343077653e-06, "epoch": 2.9939132078963353, "total_flos": 2857192049715056640, "step": 744200 }, { "loss": 2.9275, "learning_rate": 4.043906217681723e-06, "epoch": 2.99431550744053, "total_flos": 2857574751274659840, "step": 744300 }, { "loss": 2.91, "learning_rate": 4.043095092285792e-06, "epoch": 2.994717806984725, "total_flos": 2857956061288796160, "step": 744400 }, { "loss": 2.915, "learning_rate": 4.042283966889861e-06, "epoch": 2.9951201065289195, "total_flos": 2858338773470883840, "step": 744500 }, { "loss": 2.9425, "learning_rate": 4.0414728414939306e-06, "epoch": 2.995522406073114, "total_flos": 2858722595702599680, "step": 744600 }, { "loss": 2.9375, "learning_rate": 4.040661716098001e-06, "epoch": 2.9959247056173086, "total_flos": 2859099306180956160, "step": 744700 }, { "loss": 2.925, "learning_rate": 4.03985059070207e-06, "epoch": 2.996327005161503, "total_flos": 2859479904488632320, "step": 744800 }, { "loss": 2.955, "learning_rate": 4.039039465306139e-06, "epoch": 2.9967293047056978, "total_flos": 2859877812134768640, "step": 744900 }, { "loss": 2.98, "learning_rate": 4.038228339910209e-06, "epoch": 2.9971316042498923, "total_flos": 2860268284041768960, "step": 745000 }, { "loss": 2.9075, "learning_rate": 4.037417214514278e-06, "epoch": 2.997533903794087, "total_flos": 2860649015130501120, "step": 745100 }, { "loss": 2.97, "learning_rate": 4.0366060891183474e-06, "epoch": 2.9979362033382815, "total_flos": 2861028737083207680, "step": 745200 }, { "loss": 2.9275, "learning_rate": 4.035794963722417e-06, "epoch": 2.9983385028824765, "total_flos": 2861413828701818880, "step": 745300 }, { "loss": 2.9, "learning_rate": 4.034983838326487e-06, "epoch": 2.9987408024266706, "total_flos": 2861799308041113600, "step": 745400 }, { "loss": 2.93, "learning_rate": 4.034172712930556e-06, "epoch": 2.9991431019708656, "total_flos": 2862185584066744320, "step": 745500 }, { "loss": 2.9475, "learning_rate": 4.033361587534625e-06, "epoch": 2.99954540151506, "total_flos": 2862558093352488960, "step": 745600 }, { "loss": 2.96, "learning_rate": 4.032550462138694e-06, "epoch": 2.999947701059255, "total_flos": 2862913468570767360, "step": 745700 }, { "loss": 2.9025, "learning_rate": 4.031739336742764e-06, "epoch": 3.0003500006034494, "total_flos": 2863293818577868800, "step": 745800 }, { "loss": 2.855, "learning_rate": 4.0309282113468335e-06, "epoch": 3.000752300147644, "total_flos": 2863669440251566080, "step": 745900 }, { "loss": 2.9175, "learning_rate": 4.0301170859509035e-06, "epoch": 3.0011545996918385, "total_flos": 2864075484720814080, "step": 746000 }, { "loss": 2.895, "learning_rate": 4.029305960554972e-06, "epoch": 3.001556899236033, "total_flos": 2864450145059665920, "step": 746100 }, { "loss": 2.935, "learning_rate": 4.028494835159042e-06, "epoch": 3.0019591987802277, "total_flos": 2864829718297589760, "step": 746200 }, { "loss": 3.0125, "learning_rate": 4.027683709763111e-06, "epoch": 3.0023614983244222, "total_flos": 2865200241178736640, "step": 746300 }, { "loss": 2.955, "learning_rate": 4.026872584367181e-06, "epoch": 3.002763797868617, "total_flos": 2865594855854684160, "step": 746400 }, { "loss": 2.94, "learning_rate": 4.0260614589712495e-06, "epoch": 3.003166097412812, "total_flos": 2865985354317895680, "step": 746500 }, { "loss": 2.915, "learning_rate": 4.0252503335753196e-06, "epoch": 3.0035683969570064, "total_flos": 2866381896974776320, "step": 746600 }, { "loss": 2.93, "learning_rate": 4.024439208179389e-06, "epoch": 3.003970696501201, "total_flos": 2866762165985433600, "step": 746700 }, { "loss": 2.905, "learning_rate": 4.023628082783459e-06, "epoch": 3.0043729960453955, "total_flos": 2867153137149204480, "step": 746800 }, { "loss": 2.91, "learning_rate": 4.022816957387527e-06, "epoch": 3.00477529558959, "total_flos": 2867546487749498880, "step": 746900 }, { "loss": 2.92, "learning_rate": 4.022005831991597e-06, "epoch": 3.0051775951337847, "total_flos": 2867941049313024000, "step": 747000 }, { "loss": 2.9275, "learning_rate": 4.021194706595666e-06, "epoch": 3.0055798946779793, "total_flos": 2868312230788208640, "step": 747100 }, { "loss": 2.915, "learning_rate": 4.0203835811997364e-06, "epoch": 3.005982194222174, "total_flos": 2868685632362649600, "step": 747200 }, { "loss": 2.9775, "learning_rate": 4.019572455803805e-06, "epoch": 3.0063844937663684, "total_flos": 2869099277219543040, "step": 747300 }, { "loss": 2.9225, "learning_rate": 4.018761330407875e-06, "epoch": 3.006786793310563, "total_flos": 2869489807550208000, "step": 747400 }, { "loss": 2.965, "learning_rate": 4.017950205011944e-06, "epoch": 3.007189092854758, "total_flos": 2869877591968634880, "step": 747500 }, { "loss": 2.9375, "learning_rate": 4.017139079616014e-06, "epoch": 3.0075913923989526, "total_flos": 2870276774312908800, "step": 747600 }, { "loss": 2.9075, "learning_rate": 4.016327954220082e-06, "epoch": 3.007993691943147, "total_flos": 2870649071148963840, "step": 747700 }, { "loss": 2.8825, "learning_rate": 4.0155168288241525e-06, "epoch": 3.0083959914873417, "total_flos": 2871034014052792320, "step": 747800 }, { "loss": 2.9625, "learning_rate": 4.014705703428222e-06, "epoch": 3.0087982910315363, "total_flos": 2871410926358353920, "step": 747900 }, { "loss": 2.9125, "learning_rate": 4.013894578032292e-06, "epoch": 3.009200590575731, "total_flos": 2871793521693112320, "step": 748000 }, { "loss": 2.9425, "learning_rate": 4.01308345263636e-06, "epoch": 3.0096028901199254, "total_flos": 2872174061577123840, "step": 748100 }, { "loss": 2.905, "learning_rate": 4.01227232724043e-06, "epoch": 3.01000518966412, "total_flos": 2872562499278346240, "step": 748200 }, { "loss": 2.9, "learning_rate": 4.011461201844499e-06, "epoch": 3.0104074892083146, "total_flos": 2872935491887134720, "step": 748300 }, { "loss": 2.8525, "learning_rate": 4.010650076448569e-06, "epoch": 3.010809788752509, "total_flos": 2873317598587607040, "step": 748400 }, { "loss": 2.875, "learning_rate": 4.0098389510526385e-06, "epoch": 3.011212088296704, "total_flos": 2873698924535470080, "step": 748500 }, { "loss": 2.9175, "learning_rate": 4.009027825656708e-06, "epoch": 3.0116143878408987, "total_flos": 2874085821976442880, "step": 748600 }, { "loss": 2.965, "learning_rate": 4.008216700260777e-06, "epoch": 3.0120166873850933, "total_flos": 2874443693478574080, "step": 748700 }, { "loss": 2.9525, "learning_rate": 4.007405574864847e-06, "epoch": 3.012418986929288, "total_flos": 2874827430730414080, "step": 748800 }, { "loss": 2.9375, "learning_rate": 4.006594449468916e-06, "epoch": 3.0128212864734825, "total_flos": 2875210817440266240, "step": 748900 }, { "loss": 2.9125, "learning_rate": 4.005783324072985e-06, "epoch": 3.013223586017677, "total_flos": 2875608156783482880, "step": 749000 }, { "loss": 2.92, "learning_rate": 4.0049721986770545e-06, "epoch": 3.0136258855618716, "total_flos": 2875973607428290560, "step": 749100 }, { "loss": 2.905, "learning_rate": 4.004161073281125e-06, "epoch": 3.014028185106066, "total_flos": 2876360781053859840, "step": 749200 }, { "loss": 2.9075, "learning_rate": 4.003349947885194e-06, "epoch": 3.0144304846502608, "total_flos": 2876752054958438400, "step": 749300 }, { "loss": 2.9475, "learning_rate": 4.002538822489263e-06, "epoch": 3.0148327841944553, "total_flos": 2877127087084247040, "step": 749400 }, { "loss": 2.915, "learning_rate": 4.001727697093332e-06, "epoch": 3.0152350837386503, "total_flos": 2877498231380736000, "step": 749500 }, { "loss": 2.93, "learning_rate": 4.000916571697402e-06, "epoch": 3.015637383282845, "total_flos": 2877879647619717120, "step": 749600 }, { "loss": 2.9475, "learning_rate": 4.000105446301471e-06, "epoch": 3.0160396828270395, "total_flos": 2878265562480875520, "step": 749700 }, { "loss": 2.9575, "learning_rate": 3.999294320905541e-06, "epoch": 3.016441982371234, "total_flos": 2878642697858611200, "step": 749800 }, { "loss": 2.94, "learning_rate": 3.998483195509611e-06, "epoch": 3.0168442819154286, "total_flos": 2879044254328166400, "step": 749900 }, { "loss": 2.9625, "learning_rate": 3.99767207011368e-06, "epoch": 3.017246581459623, "total_flos": 2879442873680762880, "step": 750000 }, { "loss": 2.9225, "learning_rate": 3.996860944717749e-06, "epoch": 3.017648881003818, "total_flos": 2879833515547514880, "step": 750100 }, { "loss": 2.95, "learning_rate": 3.996049819321818e-06, "epoch": 3.0180511805480124, "total_flos": 2880198573160396800, "step": 750200 }, { "loss": 2.9725, "learning_rate": 3.995238693925888e-06, "epoch": 3.018453480092207, "total_flos": 2880593660536903680, "step": 750300 }, { "loss": 2.9775, "learning_rate": 3.9944275685299575e-06, "epoch": 3.0188557796364015, "total_flos": 2880988726668441600, "step": 750400 }, { "loss": 2.9175, "learning_rate": 3.993616443134027e-06, "epoch": 3.019258079180596, "total_flos": 2881395089812224000, "step": 750500 }, { "loss": 2.9525, "learning_rate": 3.992805317738096e-06, "epoch": 3.019660378724791, "total_flos": 2881790288724817920, "step": 750600 }, { "loss": 2.945, "learning_rate": 3.991994192342166e-06, "epoch": 3.0200626782689857, "total_flos": 2882170966701127680, "step": 750700 }, { "loss": 2.9475, "learning_rate": 3.991183066946235e-06, "epoch": 3.0204649778131802, "total_flos": 2882536454524631040, "step": 750800 }, { "loss": 2.9675, "learning_rate": 3.990371941550304e-06, "epoch": 3.020867277357375, "total_flos": 2882909829542860800, "step": 750900 }, { "loss": 2.97, "learning_rate": 3.9895608161543735e-06, "epoch": 3.0212695769015694, "total_flos": 2883287092390410240, "step": 751000 }, { "loss": 2.9225, "learning_rate": 3.9887496907584435e-06, "epoch": 3.021671876445764, "total_flos": 2883671206740449280, "step": 751100 }, { "loss": 2.9125, "learning_rate": 3.987938565362513e-06, "epoch": 3.0220741759899585, "total_flos": 2884060207433349120, "step": 751200 }, { "loss": 2.94, "learning_rate": 3.987127439966582e-06, "epoch": 3.022476475534153, "total_flos": 2884448666379540480, "step": 751300 }, { "loss": 2.925, "learning_rate": 3.986316314570651e-06, "epoch": 3.0228787750783477, "total_flos": 2884847684075304960, "step": 751400 }, { "loss": 2.9575, "learning_rate": 3.985505189174721e-06, "epoch": 3.0232810746225423, "total_flos": 2885230539660933120, "step": 751500 }, { "loss": 2.8725, "learning_rate": 3.98469406377879e-06, "epoch": 3.0236833741667373, "total_flos": 2885609560529664000, "step": 751600 }, { "loss": 2.915, "learning_rate": 3.9838829383828596e-06, "epoch": 3.024085673710932, "total_flos": 2885985819552430080, "step": 751700 }, { "loss": 2.9475, "learning_rate": 3.983071812986929e-06, "epoch": 3.0244879732551264, "total_flos": 2886366109808056320, "step": 751800 }, { "loss": 2.885, "learning_rate": 3.982260687590999e-06, "epoch": 3.024890272799321, "total_flos": 2886741805839144960, "step": 751900 }, { "loss": 2.955, "learning_rate": 3.981449562195068e-06, "epoch": 3.0252925723435156, "total_flos": 2887116375886878720, "step": 752000 }, { "loss": 2.935, "learning_rate": 3.980638436799137e-06, "epoch": 3.02569487188771, "total_flos": 2887498397607475200, "step": 752100 }, { "loss": 2.9575, "learning_rate": 3.979827311403206e-06, "epoch": 3.0260971714319047, "total_flos": 2887885332227143680, "step": 752200 }, { "loss": 2.925, "learning_rate": 3.9790161860072764e-06, "epoch": 3.0264994709760993, "total_flos": 2888261161039288320, "step": 752300 }, { "loss": 2.92, "learning_rate": 3.978205060611346e-06, "epoch": 3.026901770520294, "total_flos": 2888648281552435200, "step": 752400 }, { "loss": 2.9675, "learning_rate": 3.977393935215415e-06, "epoch": 3.0273040700644884, "total_flos": 2889049524658698240, "step": 752500 }, { "loss": 2.94, "learning_rate": 3.976582809819484e-06, "epoch": 3.0277063696086834, "total_flos": 2889425714635315200, "step": 752600 }, { "loss": 2.9075, "learning_rate": 3.975771684423554e-06, "epoch": 3.028108669152878, "total_flos": 2889818985566976000, "step": 752700 }, { "loss": 2.9475, "learning_rate": 3.974960559027623e-06, "epoch": 3.0285109686970726, "total_flos": 2890213005383792640, "step": 752800 }, { "loss": 2.9575, "learning_rate": 3.9741494336316925e-06, "epoch": 3.028913268241267, "total_flos": 2890609542729431040, "step": 752900 }, { "loss": 2.935, "learning_rate": 3.973338308235762e-06, "epoch": 3.0293155677854617, "total_flos": 2891005947294013440, "step": 753000 }, { "loss": 2.8975, "learning_rate": 3.972527182839832e-06, "epoch": 3.0297178673296563, "total_flos": 2891389525208586240, "step": 753100 }, { "loss": 2.92, "learning_rate": 3.971716057443901e-06, "epoch": 3.030120166873851, "total_flos": 2891761949514455040, "step": 753200 }, { "loss": 2.9125, "learning_rate": 3.97090493204797e-06, "epoch": 3.0305224664180455, "total_flos": 2892137486208276480, "step": 753300 }, { "loss": 2.9375, "learning_rate": 3.970093806652039e-06, "epoch": 3.03092476596224, "total_flos": 2892524627966392320, "step": 753400 }, { "loss": 2.9, "learning_rate": 3.969282681256109e-06, "epoch": 3.0313270655064346, "total_flos": 2892915450415380480, "step": 753500 }, { "loss": 2.9075, "learning_rate": 3.9684715558601785e-06, "epoch": 3.031729365050629, "total_flos": 2893301524613806080, "step": 753600 }, { "loss": 2.94, "learning_rate": 3.967660430464248e-06, "epoch": 3.032131664594824, "total_flos": 2893703665320007680, "step": 753700 }, { "loss": 2.9225, "learning_rate": 3.966849305068317e-06, "epoch": 3.0325339641390188, "total_flos": 2894080375798364160, "step": 753800 }, { "loss": 2.8475, "learning_rate": 3.966038179672387e-06, "epoch": 3.0329362636832133, "total_flos": 2894460708543928320, "step": 753900 }, { "loss": 2.8875, "learning_rate": 3.965227054276456e-06, "epoch": 3.033338563227408, "total_flos": 2894864952502056960, "step": 754000 }, { "loss": 2.9525, "learning_rate": 3.964415928880525e-06, "epoch": 3.0337408627716025, "total_flos": 2895239511927306240, "step": 754100 }, { "loss": 2.9, "learning_rate": 3.9636048034845945e-06, "epoch": 3.034143162315797, "total_flos": 2895621698296412160, "step": 754200 }, { "loss": 2.905, "learning_rate": 3.962793678088665e-06, "epoch": 3.0345454618599916, "total_flos": 2896025745738577920, "step": 754300 }, { "loss": 2.8875, "learning_rate": 3.961982552692734e-06, "epoch": 3.034947761404186, "total_flos": 2896418204050176000, "step": 754400 }, { "loss": 2.94, "learning_rate": 3.961171427296803e-06, "epoch": 3.035350060948381, "total_flos": 2896813217069291520, "step": 754500 }, { "loss": 2.92, "learning_rate": 3.960360301900873e-06, "epoch": 3.0357523604925754, "total_flos": 2897198728276039680, "step": 754600 }, { "loss": 2.86, "learning_rate": 3.959549176504942e-06, "epoch": 3.0361546600367704, "total_flos": 2897582327435581440, "step": 754700 }, { "loss": 2.9025, "learning_rate": 3.958738051109011e-06, "epoch": 3.036556959580965, "total_flos": 2897976804019230720, "step": 754800 }, { "loss": 2.9225, "learning_rate": 3.957926925713081e-06, "epoch": 3.0369592591251595, "total_flos": 2898354582057277440, "step": 754900 }, { "loss": 2.88, "learning_rate": 3.957115800317151e-06, "epoch": 3.037361558669354, "total_flos": 2898734219030108160, "step": 755000 }, { "loss": 2.9525, "learning_rate": 3.95630467492122e-06, "epoch": 3.0377638582135487, "total_flos": 2899122736399964160, "step": 755100 }, { "loss": 2.9075, "learning_rate": 3.955493549525289e-06, "epoch": 3.0381661577577432, "total_flos": 2899484782538496000, "step": 755200 }, { "loss": 2.89, "learning_rate": 3.954682424129358e-06, "epoch": 3.038568457301938, "total_flos": 2899879944272394240, "step": 755300 }, { "loss": 2.9125, "learning_rate": 3.953871298733428e-06, "epoch": 3.0389707568461324, "total_flos": 2900276157632256000, "step": 755400 }, { "loss": 2.9575, "learning_rate": 3.9530601733374975e-06, "epoch": 3.039373056390327, "total_flos": 2900658492716144640, "step": 755500 }, { "loss": 2.9475, "learning_rate": 3.952249047941567e-06, "epoch": 3.0397753559345215, "total_flos": 2901071680806205440, "step": 755600 }, { "loss": 2.9025, "learning_rate": 3.951437922545636e-06, "epoch": 3.0401776554787165, "total_flos": 2901461047974819840, "step": 755700 }, { "loss": 2.9125, "learning_rate": 3.950626797149706e-06, "epoch": 3.040579955022911, "total_flos": 2901853665623685120, "step": 755800 }, { "loss": 2.96, "learning_rate": 3.949815671753775e-06, "epoch": 3.0409822545671057, "total_flos": 2902233987746764800, "step": 755900 }, { "loss": 2.97, "learning_rate": 3.949004546357844e-06, "epoch": 3.0413845541113003, "total_flos": 2902621904946247680, "step": 756000 }, { "loss": 2.9175, "learning_rate": 3.9481934209619135e-06, "epoch": 3.041786853655495, "total_flos": 2903011845729024000, "step": 756100 }, { "loss": 2.8875, "learning_rate": 3.9473822955659835e-06, "epoch": 3.0421891531996894, "total_flos": 2903380674323896320, "step": 756200 }, { "loss": 2.98, "learning_rate": 3.946571170170053e-06, "epoch": 3.042591452743884, "total_flos": 2903766822879713280, "step": 756300 }, { "loss": 2.9425, "learning_rate": 3.945760044774122e-06, "epoch": 3.0429937522880786, "total_flos": 2904152466867517440, "step": 756400 }, { "loss": 2.95, "learning_rate": 3.944948919378191e-06, "epoch": 3.043396051832273, "total_flos": 2904537919650600960, "step": 756500 }, { "loss": 2.8575, "learning_rate": 3.944137793982261e-06, "epoch": 3.0437983513764677, "total_flos": 2904922814753249280, "step": 756600 }, { "loss": 2.935, "learning_rate": 3.94332666858633e-06, "epoch": 3.0442006509206627, "total_flos": 2905304215058503680, "step": 756700 }, { "loss": 2.9225, "learning_rate": 3.9425155431903996e-06, "epoch": 3.0446029504648573, "total_flos": 2905690475150407680, "step": 756800 }, { "loss": 2.975, "learning_rate": 3.941704417794469e-06, "epoch": 3.045005250009052, "total_flos": 2906087995075860480, "step": 756900 }, { "loss": 2.9475, "learning_rate": 3.940893292398539e-06, "epoch": 3.0454075495532464, "total_flos": 2906464933937633280, "step": 757000 }, { "loss": 2.9725, "learning_rate": 3.940082167002608e-06, "epoch": 3.045809849097441, "total_flos": 2906858125200660480, "step": 757100 }, { "loss": 2.97, "learning_rate": 3.939271041606678e-06, "epoch": 3.0462121486416356, "total_flos": 2907218986932172800, "step": 757200 }, { "loss": 2.9725, "learning_rate": 3.938459916210746e-06, "epoch": 3.04661444818583, "total_flos": 2907604923038300160, "step": 757300 }, { "loss": 2.91, "learning_rate": 3.9376487908148164e-06, "epoch": 3.0470167477300247, "total_flos": 2907999527091763200, "step": 757400 }, { "loss": 2.8875, "learning_rate": 3.936837665418886e-06, "epoch": 3.0474190472742193, "total_flos": 2908373677551360000, "step": 757500 }, { "loss": 2.915, "learning_rate": 3.936026540022956e-06, "epoch": 3.047821346818414, "total_flos": 2908755470888540160, "step": 757600 }, { "loss": 2.95, "learning_rate": 3.935215414627024e-06, "epoch": 3.048223646362609, "total_flos": 2909136414426961920, "step": 757700 }, { "loss": 2.92, "learning_rate": 3.934404289231094e-06, "epoch": 3.0486259459068035, "total_flos": 2909521628204144640, "step": 757800 }, { "loss": 2.9125, "learning_rate": 3.933593163835163e-06, "epoch": 3.049028245450998, "total_flos": 2909911770814126080, "step": 757900 }, { "loss": 2.92, "learning_rate": 3.932782038439233e-06, "epoch": 3.0494305449951926, "total_flos": 2910296086991370240, "step": 758000 }, { "loss": 2.935, "learning_rate": 3.931970913043302e-06, "epoch": 3.049832844539387, "total_flos": 2910671740532520960, "step": 758100 }, { "loss": 2.8825, "learning_rate": 3.931159787647372e-06, "epoch": 3.0502351440835818, "total_flos": 2911063954526976000, "step": 758200 }, { "loss": 2.9525, "learning_rate": 3.930348662251441e-06, "epoch": 3.0506374436277763, "total_flos": 2911447091608442880, "step": 758300 }, { "loss": 2.9325, "learning_rate": 3.929537536855511e-06, "epoch": 3.051039743171971, "total_flos": 2911843570530416640, "step": 758400 }, { "loss": 2.9, "learning_rate": 3.928726411459579e-06, "epoch": 3.0514420427161655, "total_flos": 2912254405740165120, "step": 758500 }, { "loss": 2.8825, "learning_rate": 3.927915286063649e-06, "epoch": 3.05184434226036, "total_flos": 2912655234569533440, "step": 758600 }, { "loss": 2.915, "learning_rate": 3.9271041606677185e-06, "epoch": 3.0522466418045546, "total_flos": 2913030367608944640, "step": 758700 }, { "loss": 2.8875, "learning_rate": 3.9262930352717886e-06, "epoch": 3.0526489413487496, "total_flos": 2913402382949160960, "step": 758800 }, { "loss": 2.9725, "learning_rate": 3.925481909875857e-06, "epoch": 3.053051240892944, "total_flos": 2913771631132170240, "step": 758900 }, { "loss": 2.98, "learning_rate": 3.924670784479927e-06, "epoch": 3.053453540437139, "total_flos": 2914162198641530880, "step": 759000 }, { "loss": 2.915, "learning_rate": 3.923859659083997e-06, "epoch": 3.0538558399813334, "total_flos": 2914541766568212480, "step": 759100 }, { "loss": 2.8775, "learning_rate": 3.923048533688066e-06, "epoch": 3.054258139525528, "total_flos": 2914925944653158400, "step": 759200 }, { "loss": 2.9375, "learning_rate": 3.922237408292135e-06, "epoch": 3.0546604390697225, "total_flos": 2915305836565616640, "step": 759300 }, { "loss": 2.93, "learning_rate": 3.921426282896205e-06, "epoch": 3.055062738613917, "total_flos": 2915685834702919680, "step": 759400 }, { "loss": 2.885, "learning_rate": 3.920615157500275e-06, "epoch": 3.0554650381581117, "total_flos": 2916063070994257920, "step": 759500 }, { "loss": 2.96, "learning_rate": 3.919804032104344e-06, "epoch": 3.0558673377023062, "total_flos": 2916446792312371200, "step": 759600 }, { "loss": 2.985, "learning_rate": 3.918992906708413e-06, "epoch": 3.056269637246501, "total_flos": 2916830359604459520, "step": 759700 }, { "loss": 2.9, "learning_rate": 3.918181781312482e-06, "epoch": 3.056671936790696, "total_flos": 2917214314617231360, "step": 759800 }, { "loss": 2.9625, "learning_rate": 3.917370655916552e-06, "epoch": 3.0570742363348904, "total_flos": 2917598492702177280, "step": 759900 }, { "loss": 2.9425, "learning_rate": 3.9165595305206215e-06, "epoch": 3.057476535879085, "total_flos": 2917990642961725440, "step": 760000 }, { "loss": 2.935, "learning_rate": 3.915748405124691e-06, "epoch": 3.0578788354232795, "total_flos": 2918378703564748800, "step": 760100 }, { "loss": 2.9475, "learning_rate": 3.91493727972876e-06, "epoch": 3.058281134967474, "total_flos": 2918760502213171200, "step": 760200 }, { "loss": 2.8975, "learning_rate": 3.91412615433283e-06, "epoch": 3.0586834345116687, "total_flos": 2919126441492264960, "step": 760300 }, { "loss": 2.8525, "learning_rate": 3.913315028936899e-06, "epoch": 3.0590857340558633, "total_flos": 2919520222303180800, "step": 760400 }, { "loss": 2.9025, "learning_rate": 3.912503903540968e-06, "epoch": 3.059488033600058, "total_flos": 2919904538480424960, "step": 760500 }, { "loss": 2.87, "learning_rate": 3.9116927781450375e-06, "epoch": 3.0598903331442524, "total_flos": 2920277228348405760, "step": 760600 }, { "loss": 2.8675, "learning_rate": 3.9108816527491075e-06, "epoch": 3.060292632688447, "total_flos": 2920658920771983360, "step": 760700 }, { "loss": 2.965, "learning_rate": 3.910070527353177e-06, "epoch": 3.060694932232642, "total_flos": 2921029597679155200, "step": 760800 }, { "loss": 2.9425, "learning_rate": 3.909259401957246e-06, "epoch": 3.0610972317768366, "total_flos": 2921410164119377920, "step": 760900 }, { "loss": 2.9575, "learning_rate": 3.908448276561315e-06, "epoch": 3.061499531321031, "total_flos": 2921774239152445440, "step": 761000 }, { "loss": 2.91, "learning_rate": 3.907637151165385e-06, "epoch": 3.0619018308652257, "total_flos": 2922147582303221760, "step": 761100 }, { "loss": 2.89, "learning_rate": 3.906826025769454e-06, "epoch": 3.0623041304094203, "total_flos": 2922544725130475520, "step": 761200 }, { "loss": 2.9025, "learning_rate": 3.9060149003735235e-06, "epoch": 3.062706429953615, "total_flos": 2922919029616097280, "step": 761300 }, { "loss": 2.9075, "learning_rate": 3.905203774977593e-06, "epoch": 3.0631087294978094, "total_flos": 2923299351739176960, "step": 761400 }, { "loss": 2.93, "learning_rate": 3.904392649581663e-06, "epoch": 3.063511029042004, "total_flos": 2923679987225548800, "step": 761500 }, { "loss": 2.9325, "learning_rate": 3.903581524185732e-06, "epoch": 3.0639133285861986, "total_flos": 2924068196543354880, "step": 761600 }, { "loss": 2.9225, "learning_rate": 3.902770398789801e-06, "epoch": 3.064315628130393, "total_flos": 2924446362302085120, "step": 761700 }, { "loss": 2.9575, "learning_rate": 3.90195927339387e-06, "epoch": 3.0647179276745877, "total_flos": 2924829706521999360, "step": 761800 }, { "loss": 2.91, "learning_rate": 3.90114814799794e-06, "epoch": 3.0651202272187827, "total_flos": 2925213815560796160, "step": 761900 }, { "loss": 2.9425, "learning_rate": 3.90033702260201e-06, "epoch": 3.0655225267629773, "total_flos": 2925594286398658560, "step": 762000 }, { "loss": 2.935, "learning_rate": 3.899525897206079e-06, "epoch": 3.065924826307172, "total_flos": 2925972117549127680, "step": 762100 }, { "loss": 2.935, "learning_rate": 3.898714771810148e-06, "epoch": 3.0663271258513665, "total_flos": 2926352992041400320, "step": 762200 }, { "loss": 2.915, "learning_rate": 3.897903646414218e-06, "epoch": 3.066729425395561, "total_flos": 2926743602040698880, "step": 762300 }, { "loss": 2.9025, "learning_rate": 3.897092521018287e-06, "epoch": 3.0671317249397556, "total_flos": 2927128529010800640, "step": 762400 }, { "loss": 2.95, "learning_rate": 3.8962813956223564e-06, "epoch": 3.06753402448395, "total_flos": 2927517221651650560, "step": 762500 }, { "loss": 2.9125, "learning_rate": 3.895470270226426e-06, "epoch": 3.0679363240281448, "total_flos": 2927911485785610240, "step": 762600 }, { "loss": 2.9075, "learning_rate": 3.894659144830496e-06, "epoch": 3.0683386235723393, "total_flos": 2928306690009446400, "step": 762700 }, { "loss": 2.9025, "learning_rate": 3.893848019434565e-06, "epoch": 3.068740923116534, "total_flos": 2928682104544696320, "step": 762800 }, { "loss": 2.91, "learning_rate": 3.893036894038634e-06, "epoch": 3.069143222660729, "total_flos": 2929083469809530880, "step": 762900 }, { "loss": 2.935, "learning_rate": 3.892225768642703e-06, "epoch": 3.0695455222049235, "total_flos": 2929451623876638720, "step": 763000 }, { "loss": 2.9725, "learning_rate": 3.891414643246773e-06, "epoch": 3.069947821749118, "total_flos": 2929840550212147200, "step": 763100 }, { "loss": 2.8825, "learning_rate": 3.8906035178508425e-06, "epoch": 3.0703501212933126, "total_flos": 2930247757843445760, "step": 763200 }, { "loss": 2.945, "learning_rate": 3.889792392454912e-06, "epoch": 3.070752420837507, "total_flos": 2930637050654668800, "step": 763300 }, { "loss": 2.8875, "learning_rate": 3.888981267058981e-06, "epoch": 3.071154720381702, "total_flos": 2931038654925404160, "step": 763400 }, { "loss": 2.9175, "learning_rate": 3.888170141663051e-06, "epoch": 3.0715570199258964, "total_flos": 2931419247921838080, "step": 763500 }, { "loss": 2.9625, "learning_rate": 3.88735901626712e-06, "epoch": 3.071959319470091, "total_flos": 2931790169146152960, "step": 763600 }, { "loss": 2.9425, "learning_rate": 3.886547890871189e-06, "epoch": 3.0723616190142855, "total_flos": 2932185835448064000, "step": 763700 }, { "loss": 2.9325, "learning_rate": 3.885736765475259e-06, "epoch": 3.07276391855848, "total_flos": 2932582319681280000, "step": 763800 }, { "loss": 2.92, "learning_rate": 3.8849256400793286e-06, "epoch": 3.073166218102675, "total_flos": 2932970539621570560, "step": 763900 }, { "loss": 2.9325, "learning_rate": 3.884114514683398e-06, "epoch": 3.0735685176468697, "total_flos": 2933349788873717760, "step": 764000 }, { "loss": 2.91, "learning_rate": 3.883303389287467e-06, "epoch": 3.0739708171910642, "total_flos": 2933743856491714560, "step": 764100 }, { "loss": 2.9275, "learning_rate": 3.882492263891537e-06, "epoch": 3.074373116735259, "total_flos": 2934133255527782400, "step": 764200 }, { "loss": 2.915, "learning_rate": 3.881681138495606e-06, "epoch": 3.0747754162794534, "total_flos": 2934510104098437120, "step": 764300 }, { "loss": 2.95, "learning_rate": 3.880870013099675e-06, "epoch": 3.075177715823648, "total_flos": 2934890760829777920, "step": 764400 }, { "loss": 2.95, "learning_rate": 3.880058887703745e-06, "epoch": 3.0755800153678425, "total_flos": 2935271401627392000, "step": 764500 }, { "loss": 2.9425, "learning_rate": 3.879247762307815e-06, "epoch": 3.075982314912037, "total_flos": 2935654225345566720, "step": 764600 }, { "loss": 2.9425, "learning_rate": 3.878436636911884e-06, "epoch": 3.0763846144562317, "total_flos": 2936034111946782720, "step": 764700 }, { "loss": 2.9075, "learning_rate": 3.877625511515953e-06, "epoch": 3.0767869140004263, "total_flos": 2936423803101173760, "step": 764800 }, { "loss": 2.91, "learning_rate": 3.876814386120022e-06, "epoch": 3.077189213544621, "total_flos": 2936811879637923840, "step": 764900 }, { "loss": 2.945, "learning_rate": 3.876003260724092e-06, "epoch": 3.077591513088816, "total_flos": 2937193354300569600, "step": 765000 }, { "loss": 2.9125, "learning_rate": 3.8751921353281615e-06, "epoch": 3.0779938126330104, "total_flos": 2937582397483407360, "step": 765100 }, { "loss": 2.96, "learning_rate": 3.874381009932231e-06, "epoch": 3.078396112177205, "total_flos": 2937958768042260480, "step": 765200 }, { "loss": 2.905, "learning_rate": 3.8735698845363e-06, "epoch": 3.0787984117213996, "total_flos": 2938342574340249600, "step": 765300 }, { "loss": 2.88, "learning_rate": 3.87275875914037e-06, "epoch": 3.079200711265594, "total_flos": 2938713708014254080, "step": 765400 }, { "loss": 2.8925, "learning_rate": 3.871947633744439e-06, "epoch": 3.0796030108097887, "total_flos": 2939094141673420800, "step": 765500 }, { "loss": 2.8975, "learning_rate": 3.871136508348508e-06, "epoch": 3.0800053103539833, "total_flos": 2939472928847493120, "step": 765600 }, { "loss": 2.9025, "learning_rate": 3.8703253829525775e-06, "epoch": 3.080407609898178, "total_flos": 2939849979245352960, "step": 765700 }, { "loss": 2.9325, "learning_rate": 3.8695142575566475e-06, "epoch": 3.0808099094423724, "total_flos": 2940243048349808640, "step": 765800 }, { "loss": 2.9425, "learning_rate": 3.868703132160717e-06, "epoch": 3.0812122089865674, "total_flos": 2940618760314624000, "step": 765900 }, { "loss": 2.925, "learning_rate": 3.867892006764786e-06, "epoch": 3.081614508530762, "total_flos": 2941008377111623680, "step": 766000 }, { "loss": 2.9275, "learning_rate": 3.867080881368855e-06, "epoch": 3.0820168080749566, "total_flos": 2941394525667440640, "step": 766100 }, { "loss": 2.9175, "learning_rate": 3.866269755972925e-06, "epoch": 3.082419107619151, "total_flos": 2941774321977538560, "step": 766200 }, { "loss": 2.8925, "learning_rate": 3.865458630576994e-06, "epoch": 3.0828214071633457, "total_flos": 2942158192010434560, "step": 766300 }, { "loss": 2.94, "learning_rate": 3.8646475051810635e-06, "epoch": 3.0832237067075403, "total_flos": 2942551112400107520, "step": 766400 }, { "loss": 2.95, "learning_rate": 3.863836379785133e-06, "epoch": 3.083626006251735, "total_flos": 2942938700302571520, "step": 766500 }, { "loss": 2.845, "learning_rate": 3.863025254389203e-06, "epoch": 3.0840283057959295, "total_flos": 2943328694197770240, "step": 766600 }, { "loss": 2.93, "learning_rate": 3.862214128993272e-06, "epoch": 3.084430605340124, "total_flos": 2943704900108113920, "step": 766700 }, { "loss": 2.905, "learning_rate": 3.861403003597341e-06, "epoch": 3.0848329048843186, "total_flos": 2944090607830824960, "step": 766800 }, { "loss": 2.9475, "learning_rate": 3.86059187820141e-06, "epoch": 3.085235204428513, "total_flos": 2944458236084951040, "step": 766900 }, { "loss": 2.95, "learning_rate": 3.85978075280548e-06, "epoch": 3.085637503972708, "total_flos": 2944845122903439360, "step": 767000 }, { "loss": 2.9275, "learning_rate": 3.85896962740955e-06, "epoch": 3.0860398035169028, "total_flos": 2945221307568814080, "step": 767100 }, { "loss": 2.8575, "learning_rate": 3.858158502013619e-06, "epoch": 3.0864421030610973, "total_flos": 2945600694913259520, "step": 767200 }, { "loss": 2.88, "learning_rate": 3.857347376617688e-06, "epoch": 3.086844402605292, "total_flos": 2945986960316405760, "step": 767300 }, { "loss": 2.905, "learning_rate": 3.856536251221758e-06, "epoch": 3.0872467021494865, "total_flos": 2946355433058048000, "step": 767400 }, { "loss": 2.9175, "learning_rate": 3.855725125825827e-06, "epoch": 3.087649001693681, "total_flos": 2946721664455464960, "step": 767500 }, { "loss": 2.945, "learning_rate": 3.8549140004298964e-06, "epoch": 3.0880513012378756, "total_flos": 2947086716757104640, "step": 767600 }, { "loss": 2.88, "learning_rate": 3.854102875033966e-06, "epoch": 3.08845360078207, "total_flos": 2947466667093227520, "step": 767700 }, { "loss": 2.8825, "learning_rate": 3.853291749638036e-06, "epoch": 3.088855900326265, "total_flos": 2947843505041397760, "step": 767800 }, { "loss": 2.8875, "learning_rate": 3.852480624242105e-06, "epoch": 3.0892581998704594, "total_flos": 2948226562454231040, "step": 767900 }, { "loss": 2.91, "learning_rate": 3.851669498846174e-06, "epoch": 3.0896604994146544, "total_flos": 2948600718225070080, "step": 768000 }, { "loss": 2.9275, "learning_rate": 3.850858373450243e-06, "epoch": 3.090062798958849, "total_flos": 2948985666440140800, "step": 768100 }, { "loss": 2.8575, "learning_rate": 3.850047248054313e-06, "epoch": 3.0904650985030435, "total_flos": 2949371809684715520, "step": 768200 }, { "loss": 2.92, "learning_rate": 3.8492361226583825e-06, "epoch": 3.090867398047238, "total_flos": 2949749476186675200, "step": 768300 }, { "loss": 2.97, "learning_rate": 3.848424997262452e-06, "epoch": 3.0912696975914327, "total_flos": 2950136872884418560, "step": 768400 }, { "loss": 2.9375, "learning_rate": 3.847613871866521e-06, "epoch": 3.0916719971356272, "total_flos": 2950534111314032640, "step": 768500 }, { "loss": 2.88, "learning_rate": 3.846802746470591e-06, "epoch": 3.092074296679822, "total_flos": 2950928842837309440, "step": 768600 }, { "loss": 2.935, "learning_rate": 3.845991621074661e-06, "epoch": 3.0924765962240164, "total_flos": 2951322809541703680, "step": 768700 }, { "loss": 2.8875, "learning_rate": 3.84518049567873e-06, "epoch": 3.092878895768211, "total_flos": 2951715198807152640, "step": 768800 }, { "loss": 2.8875, "learning_rate": 3.844369370282799e-06, "epoch": 3.0932811953124055, "total_flos": 2952082603989104640, "step": 768900 }, { "loss": 2.88, "learning_rate": 3.8435582448868686e-06, "epoch": 3.0936834948566005, "total_flos": 2952462607437649920, "step": 769000 }, { "loss": 2.94, "learning_rate": 3.842747119490939e-06, "epoch": 3.094085794400795, "total_flos": 2952852234857134080, "step": 769100 }, { "loss": 2.8825, "learning_rate": 3.841935994095008e-06, "epoch": 3.0944880939449897, "total_flos": 2953227240426731520, "step": 769200 }, { "loss": 2.9, "learning_rate": 3.841124868699077e-06, "epoch": 3.0948903934891843, "total_flos": 2953634299343247360, "step": 769300 }, { "loss": 2.8725, "learning_rate": 3.840313743303146e-06, "epoch": 3.095292693033379, "total_flos": 2954012927180052480, "step": 769400 }, { "loss": 2.9, "learning_rate": 3.839502617907216e-06, "epoch": 3.0956949925775734, "total_flos": 2954394943589406720, "step": 769500 }, { "loss": 2.9075, "learning_rate": 3.8386914925112854e-06, "epoch": 3.096097292121768, "total_flos": 2954780332637583360, "step": 769600 }, { "loss": 2.9275, "learning_rate": 3.837880367115355e-06, "epoch": 3.0964995916659626, "total_flos": 2955173311450920960, "step": 769700 }, { "loss": 2.92, "learning_rate": 3.837069241719424e-06, "epoch": 3.096901891210157, "total_flos": 2955567910193141760, "step": 769800 }, { "loss": 2.9275, "learning_rate": 3.836258116323494e-06, "epoch": 3.0973041907543517, "total_flos": 2955955753035233280, "step": 769900 }, { "loss": 2.9075, "learning_rate": 3.835446990927563e-06, "epoch": 3.0977064902985463, "total_flos": 2956335097889740800, "step": 770000 }, { "loss": 2.9, "learning_rate": 3.834635865531632e-06, "epoch": 3.0981087898427413, "total_flos": 2956716402592634880, "step": 770100 }, { "loss": 2.965, "learning_rate": 3.8338247401357015e-06, "epoch": 3.098511089386936, "total_flos": 2957097930367703040, "step": 770200 }, { "loss": 2.9275, "learning_rate": 3.8330136147397715e-06, "epoch": 3.0989133889311304, "total_flos": 2957472394190592000, "step": 770300 }, { "loss": 2.9125, "learning_rate": 3.832202489343841e-06, "epoch": 3.099315688475325, "total_flos": 2957843777492981760, "step": 770400 }, { "loss": 2.9375, "learning_rate": 3.83139136394791e-06, "epoch": 3.0997179880195196, "total_flos": 2958241297418434560, "step": 770500 }, { "loss": 2.88, "learning_rate": 3.830580238551979e-06, "epoch": 3.100120287563714, "total_flos": 2958606057601751040, "step": 770600 }, { "loss": 2.9175, "learning_rate": 3.829769113156049e-06, "epoch": 3.1005225871079087, "total_flos": 2958995249499371520, "step": 770700 }, { "loss": 2.88, "learning_rate": 3.828957987760118e-06, "epoch": 3.1009248866521033, "total_flos": 2959375093610649600, "step": 770800 }, { "loss": 2.89, "learning_rate": 3.8281468623641875e-06, "epoch": 3.101327186196298, "total_flos": 2959764487335475200, "step": 770900 }, { "loss": 2.9125, "learning_rate": 3.827335736968257e-06, "epoch": 3.1017294857404925, "total_flos": 2960149871072409600, "step": 771000 }, { "loss": 2.9525, "learning_rate": 3.826524611572327e-06, "epoch": 3.1021317852846875, "total_flos": 2960520319596165120, "step": 771100 }, { "loss": 2.86, "learning_rate": 3.825713486176396e-06, "epoch": 3.102534084828882, "total_flos": 2960891293932902400, "step": 771200 }, { "loss": 2.9375, "learning_rate": 3.824902360780465e-06, "epoch": 3.1029363843730766, "total_flos": 2961274022048716800, "step": 771300 }, { "loss": 2.8825, "learning_rate": 3.824091235384534e-06, "epoch": 3.103338683917271, "total_flos": 2961673055678208000, "step": 771400 }, { "loss": 2.86, "learning_rate": 3.823280109988604e-06, "epoch": 3.1037409834614658, "total_flos": 2962069115012044800, "step": 771500 }, { "loss": 2.9575, "learning_rate": 3.822468984592674e-06, "epoch": 3.1041432830056603, "total_flos": 2962439754740520960, "step": 771600 }, { "loss": 2.9025, "learning_rate": 3.821657859196743e-06, "epoch": 3.104545582549855, "total_flos": 2962814176073472000, "step": 771700 }, { "loss": 2.925, "learning_rate": 3.820846733800812e-06, "epoch": 3.1049478820940495, "total_flos": 2963197058215311360, "step": 771800 }, { "loss": 2.9575, "learning_rate": 3.820035608404882e-06, "epoch": 3.105350181638244, "total_flos": 2963580837957089280, "step": 771900 }, { "loss": 2.8925, "learning_rate": 3.819224483008951e-06, "epoch": 3.1057524811824386, "total_flos": 2963967570749552640, "step": 772000 }, { "loss": 2.9025, "learning_rate": 3.81841335761302e-06, "epoch": 3.1061547807266336, "total_flos": 2964354749686364160, "step": 772100 }, { "loss": 2.93, "learning_rate": 3.81760223221709e-06, "epoch": 3.106557080270828, "total_flos": 2964725920539064320, "step": 772200 }, { "loss": 2.91, "learning_rate": 3.81679110682116e-06, "epoch": 3.106959379815023, "total_flos": 2965116360578611200, "step": 772300 }, { "loss": 2.885, "learning_rate": 3.815979981425229e-06, "epoch": 3.1073616793592174, "total_flos": 2965491711378954240, "step": 772400 }, { "loss": 2.9125, "learning_rate": 3.815168856029298e-06, "epoch": 3.107763978903412, "total_flos": 2965872644294891520, "step": 772500 }, { "loss": 2.91, "learning_rate": 3.8143577306333672e-06, "epoch": 3.1081662784476065, "total_flos": 2966258182057850880, "step": 772600 }, { "loss": 2.915, "learning_rate": 3.813546605237437e-06, "epoch": 3.108568577991801, "total_flos": 2966633894022666240, "step": 772700 }, { "loss": 2.8875, "learning_rate": 3.812735479841506e-06, "epoch": 3.1089708775359957, "total_flos": 2967008267554437120, "step": 772800 }, { "loss": 2.985, "learning_rate": 3.8119243544455757e-06, "epoch": 3.1093731770801902, "total_flos": 2967394612626216960, "step": 772900 }, { "loss": 2.9775, "learning_rate": 3.811113229049645e-06, "epoch": 3.109775476624385, "total_flos": 2967785158890608640, "step": 773000 }, { "loss": 2.915, "learning_rate": 3.8103021036537145e-06, "epoch": 3.1101777761685794, "total_flos": 2968178472312207360, "step": 773100 }, { "loss": 2.8975, "learning_rate": 3.8094909782577837e-06, "epoch": 3.1105800757127744, "total_flos": 2968563717956843520, "step": 773200 }, { "loss": 2.9375, "learning_rate": 3.8086798528618533e-06, "epoch": 3.110982375256969, "total_flos": 2968948161603901440, "step": 773300 }, { "loss": 2.86, "learning_rate": 3.8078687274659233e-06, "epoch": 3.1113846748011635, "total_flos": 2969346993406187520, "step": 773400 }, { "loss": 2.905, "learning_rate": 3.807057602069992e-06, "epoch": 3.111786974345358, "total_flos": 2969739950974556160, "step": 773500 }, { "loss": 2.9025, "learning_rate": 3.806246476674062e-06, "epoch": 3.1121892738895527, "total_flos": 2970126466006087680, "step": 773600 }, { "loss": 2.9425, "learning_rate": 3.805435351278131e-06, "epoch": 3.1125915734337473, "total_flos": 2970519397018245120, "step": 773700 }, { "loss": 2.9225, "learning_rate": 3.804624225882201e-06, "epoch": 3.112993872977942, "total_flos": 2970892495851878400, "step": 773800 }, { "loss": 2.9225, "learning_rate": 3.8038131004862697e-06, "epoch": 3.1133961725221364, "total_flos": 2971271240536012800, "step": 773900 }, { "loss": 2.9275, "learning_rate": 3.8030019750903398e-06, "epoch": 3.113798472066331, "total_flos": 2971640934863370240, "step": 774000 }, { "loss": 2.8625, "learning_rate": 3.8021908496944086e-06, "epoch": 3.1142007716105256, "total_flos": 2972031258055587840, "step": 774100 }, { "loss": 2.88, "learning_rate": 3.8013797242984786e-06, "epoch": 3.1146030711547206, "total_flos": 2972437451239618560, "step": 774200 }, { "loss": 2.9475, "learning_rate": 3.800568598902548e-06, "epoch": 3.115005370698915, "total_flos": 2972818017679841280, "step": 774300 }, { "loss": 2.8875, "learning_rate": 3.7997574735066174e-06, "epoch": 3.1154076702431097, "total_flos": 2973207990330071040, "step": 774400 }, { "loss": 2.8925, "learning_rate": 3.7989463481106866e-06, "epoch": 3.1158099697873043, "total_flos": 2973600459264153600, "step": 774500 }, { "loss": 2.87, "learning_rate": 3.7981352227147562e-06, "epoch": 3.116212269331499, "total_flos": 2973992747616000000, "step": 774600 }, { "loss": 2.9625, "learning_rate": 3.7973240973188254e-06, "epoch": 3.1166145688756934, "total_flos": 2974387378225674240, "step": 774700 }, { "loss": 2.8425, "learning_rate": 3.796512971922895e-06, "epoch": 3.117016868419888, "total_flos": 2974763430109992960, "step": 774800 }, { "loss": 2.9275, "learning_rate": 3.7957018465269642e-06, "epoch": 3.1174191679640826, "total_flos": 2975156828511467520, "step": 774900 }, { "loss": 2.875, "learning_rate": 3.794890721131034e-06, "epoch": 3.117821467508277, "total_flos": 2975533661148395520, "step": 775000 }, { "loss": 2.9025, "learning_rate": 3.794079595735103e-06, "epoch": 3.1182237670524717, "total_flos": 2975912915711784960, "step": 775100 }, { "loss": 2.9375, "learning_rate": 3.7932684703391727e-06, "epoch": 3.1186260665966667, "total_flos": 2976307068309657600, "step": 775200 }, { "loss": 2.885, "learning_rate": 3.792457344943242e-06, "epoch": 3.1190283661408613, "total_flos": 2976687841888327680, "step": 775300 }, { "loss": 2.8725, "learning_rate": 3.7916462195473115e-06, "epoch": 3.119430665685056, "total_flos": 2977082594656573440, "step": 775400 }, { "loss": 2.9075, "learning_rate": 3.7908350941513807e-06, "epoch": 3.1198329652292505, "total_flos": 2977451104576911360, "step": 775500 }, { "loss": 2.9175, "learning_rate": 3.7900239687554503e-06, "epoch": 3.120235264773445, "total_flos": 2977836833544591360, "step": 775600 }, { "loss": 2.91, "learning_rate": 3.7892128433595195e-06, "epoch": 3.1206375643176396, "total_flos": 2978235330738616320, "step": 775700 }, { "loss": 2.9125, "learning_rate": 3.788401717963589e-06, "epoch": 3.121039863861834, "total_flos": 2978613910774241280, "step": 775800 }, { "loss": 2.895, "learning_rate": 3.7875905925676583e-06, "epoch": 3.1214421634060288, "total_flos": 2979002513123973120, "step": 775900 }, { "loss": 2.955, "learning_rate": 3.786779467171728e-06, "epoch": 3.1218444629502233, "total_flos": 2979371973756672000, "step": 776000 }, { "loss": 2.865, "learning_rate": 3.785968341775797e-06, "epoch": 3.122246762494418, "total_flos": 2979756119974164480, "step": 776100 }, { "loss": 2.9025, "learning_rate": 3.7851572163798668e-06, "epoch": 3.122649062038613, "total_flos": 2980133823654819840, "step": 776200 }, { "loss": 2.905, "learning_rate": 3.784346090983936e-06, "epoch": 3.1230513615828075, "total_flos": 2980522319779706880, "step": 776300 }, { "loss": 2.9175, "learning_rate": 3.7835349655880056e-06, "epoch": 3.123453661127002, "total_flos": 2980896996052285440, "step": 776400 }, { "loss": 2.9875, "learning_rate": 3.7827238401920748e-06, "epoch": 3.1238559606711966, "total_flos": 2981270726923745280, "step": 776500 }, { "loss": 2.86, "learning_rate": 3.7819127147961444e-06, "epoch": 3.124258260215391, "total_flos": 2981650852530862080, "step": 776600 }, { "loss": 2.8775, "learning_rate": 3.7811015894002136e-06, "epoch": 3.124660559759586, "total_flos": 2982037558767114240, "step": 776700 }, { "loss": 2.94, "learning_rate": 3.780290464004283e-06, "epoch": 3.1250628593037804, "total_flos": 2982421784653240320, "step": 776800 }, { "loss": 2.935, "learning_rate": 3.7794793386083524e-06, "epoch": 3.125465158847975, "total_flos": 2982814062382602240, "step": 776900 }, { "loss": 2.8975, "learning_rate": 3.778668213212422e-06, "epoch": 3.1258674583921695, "total_flos": 2983207269579356160, "step": 777000 }, { "loss": 2.905, "learning_rate": 3.7778570878164912e-06, "epoch": 3.126269757936364, "total_flos": 2983588887645542400, "step": 777100 }, { "loss": 2.91, "learning_rate": 3.777045962420561e-06, "epoch": 3.126672057480559, "total_flos": 2983996265236592640, "step": 777200 }, { "loss": 2.935, "learning_rate": 3.77623483702463e-06, "epoch": 3.1270743570247537, "total_flos": 2984390954269931520, "step": 777300 }, { "loss": 2.925, "learning_rate": 3.7754237116286996e-06, "epoch": 3.1274766565689482, "total_flos": 2984781325263329280, "step": 777400 }, { "loss": 2.9125, "learning_rate": 3.774612586232769e-06, "epoch": 3.127878956113143, "total_flos": 2985180566031267840, "step": 777500 }, { "loss": 2.9125, "learning_rate": 3.7738014608368385e-06, "epoch": 3.1282812556573374, "total_flos": 2985546367218063360, "step": 777600 }, { "loss": 2.86, "learning_rate": 3.7729903354409077e-06, "epoch": 3.128683555201532, "total_flos": 2985937216223262720, "step": 777700 }, { "loss": 2.8525, "learning_rate": 3.7721792100449773e-06, "epoch": 3.1290858547457265, "total_flos": 2986310920538511360, "step": 777800 }, { "loss": 2.92, "learning_rate": 3.7713680846490465e-06, "epoch": 3.129488154289921, "total_flos": 2986695178292090880, "step": 777900 }, { "loss": 2.9225, "learning_rate": 3.770556959253116e-06, "epoch": 3.1298904538341157, "total_flos": 2987079117371136000, "step": 778000 }, { "loss": 2.945, "learning_rate": 3.7697458338571857e-06, "epoch": 3.1302927533783103, "total_flos": 2987477349003048960, "step": 778100 }, { "loss": 2.925, "learning_rate": 3.768934708461255e-06, "epoch": 3.130695052922505, "total_flos": 2987870078188001280, "step": 778200 }, { "loss": 2.9125, "learning_rate": 3.7681235830653245e-06, "epoch": 3.1310973524667, "total_flos": 2988244085244057600, "step": 778300 }, { "loss": 2.915, "learning_rate": 3.7673124576693937e-06, "epoch": 3.1314996520108944, "total_flos": 2988635380393605120, "step": 778400 }, { "loss": 2.905, "learning_rate": 3.7665013322734633e-06, "epoch": 3.131901951555089, "total_flos": 2989015686582958080, "step": 778500 }, { "loss": 2.915, "learning_rate": 3.7656902068775325e-06, "epoch": 3.1323042510992836, "total_flos": 2989384642647644160, "step": 778600 }, { "loss": 2.895, "learning_rate": 3.764879081481602e-06, "epoch": 3.132706550643478, "total_flos": 2989768395833210880, "step": 778700 }, { "loss": 2.8675, "learning_rate": 3.7640679560856714e-06, "epoch": 3.1331088501876727, "total_flos": 2990156791044495360, "step": 778800 }, { "loss": 2.9025, "learning_rate": 3.763256830689741e-06, "epoch": 3.1335111497318673, "total_flos": 2990542403164846080, "step": 778900 }, { "loss": 2.91, "learning_rate": 3.76244570529381e-06, "epoch": 3.133913449276062, "total_flos": 2990936608875141120, "step": 779000 }, { "loss": 2.9075, "learning_rate": 3.7616345798978798e-06, "epoch": 3.1343157488202564, "total_flos": 2991332928459847680, "step": 779100 }, { "loss": 2.8475, "learning_rate": 3.760823454501949e-06, "epoch": 3.134718048364451, "total_flos": 2991711104841062400, "step": 779200 }, { "loss": 2.94, "learning_rate": 3.7600123291060186e-06, "epoch": 3.135120347908646, "total_flos": 2992092303319111680, "step": 779300 }, { "loss": 2.915, "learning_rate": 3.759201203710088e-06, "epoch": 3.1355226474528406, "total_flos": 2992471180784302080, "step": 779400 }, { "loss": 2.8925, "learning_rate": 3.7583900783141574e-06, "epoch": 3.135924946997035, "total_flos": 2992838182311843840, "step": 779500 }, { "loss": 2.895, "learning_rate": 3.7575789529182266e-06, "epoch": 3.1363272465412297, "total_flos": 2993232749186611200, "step": 779600 }, { "loss": 2.885, "learning_rate": 3.7567678275222962e-06, "epoch": 3.1367295460854243, "total_flos": 2993620029037025280, "step": 779700 }, { "loss": 2.8925, "learning_rate": 3.7559567021263654e-06, "epoch": 3.137131845629619, "total_flos": 2993999857214576640, "step": 779800 }, { "loss": 2.8725, "learning_rate": 3.755145576730435e-06, "epoch": 3.1375341451738135, "total_flos": 2994403538181027840, "step": 779900 }, { "loss": 2.8675, "learning_rate": 3.7543344513345042e-06, "epoch": 3.137936444718008, "total_flos": 2994788937851688960, "step": 780000 }, { "loss": 2.9225, "learning_rate": 3.753523325938574e-06, "epoch": 3.1383387442622026, "total_flos": 2995173094691665920, "step": 780100 }, { "loss": 2.905, "learning_rate": 3.752712200542643e-06, "epoch": 3.138741043806397, "total_flos": 2995563508175001600, "step": 780200 }, { "loss": 2.9025, "learning_rate": 3.7519010751467127e-06, "epoch": 3.139143343350592, "total_flos": 2995949901047961600, "step": 780300 }, { "loss": 2.9625, "learning_rate": 3.751089949750782e-06, "epoch": 3.1395456428947868, "total_flos": 2996334997977815040, "step": 780400 }, { "loss": 2.8825, "learning_rate": 3.7502788243548515e-06, "epoch": 3.1399479424389813, "total_flos": 2996732624128112640, "step": 780500 }, { "loss": 2.865, "learning_rate": 3.7494676989589207e-06, "epoch": 3.140350241983176, "total_flos": 2997135837705246720, "step": 780600 }, { "loss": 2.945, "learning_rate": 3.7486565735629903e-06, "epoch": 3.1407525415273705, "total_flos": 2997512734077081600, "step": 780700 }, { "loss": 2.865, "learning_rate": 3.7478454481670595e-06, "epoch": 3.141154841071565, "total_flos": 2997917615384279040, "step": 780800 }, { "loss": 2.8875, "learning_rate": 3.747034322771129e-06, "epoch": 3.1415571406157596, "total_flos": 2998298027798476800, "step": 780900 }, { "loss": 2.9175, "learning_rate": 3.7462231973751983e-06, "epoch": 3.141959440159954, "total_flos": 2998674233708820480, "step": 781000 }, { "loss": 2.8625, "learning_rate": 3.745412071979268e-06, "epoch": 3.142361739704149, "total_flos": 2999070691385825280, "step": 781100 }, { "loss": 2.9025, "learning_rate": 3.744600946583337e-06, "epoch": 3.1427640392483434, "total_flos": 2999450625788221440, "step": 781200 }, { "loss": 2.905, "learning_rate": 3.7437898211874068e-06, "epoch": 3.143166338792538, "total_flos": 2999833433572669440, "step": 781300 }, { "loss": 2.8725, "learning_rate": 3.742978695791476e-06, "epoch": 3.143568638336733, "total_flos": 3000221016163891200, "step": 781400 }, { "loss": 2.885, "learning_rate": 3.7421675703955456e-06, "epoch": 3.1439709378809275, "total_flos": 3000606033425111040, "step": 781500 }, { "loss": 2.9125, "learning_rate": 3.7413564449996148e-06, "epoch": 3.144373237425122, "total_flos": 3001000281625344000, "step": 781600 }, { "loss": 2.8925, "learning_rate": 3.7405453196036844e-06, "epoch": 3.1447755369693167, "total_flos": 3001380253206435840, "step": 781700 }, { "loss": 2.9175, "learning_rate": 3.7397341942077536e-06, "epoch": 3.1451778365135112, "total_flos": 3001776036355676160, "step": 781800 }, { "loss": 2.8925, "learning_rate": 3.738923068811823e-06, "epoch": 3.145580136057706, "total_flos": 3002166391415347200, "step": 781900 }, { "loss": 2.92, "learning_rate": 3.7381119434158924e-06, "epoch": 3.1459824356019004, "total_flos": 3002560660860549120, "step": 782000 }, { "loss": 2.905, "learning_rate": 3.737300818019962e-06, "epoch": 3.146384735146095, "total_flos": 3002954091129477120, "step": 782100 }, { "loss": 2.89, "learning_rate": 3.7364896926240312e-06, "epoch": 3.1467870346902895, "total_flos": 3003341339112437760, "step": 782200 }, { "loss": 2.8725, "learning_rate": 3.735678567228101e-06, "epoch": 3.1471893342344845, "total_flos": 3003712138178181120, "step": 782300 }, { "loss": 2.8825, "learning_rate": 3.73486744183217e-06, "epoch": 3.147591633778679, "total_flos": 3004098318601451520, "step": 782400 }, { "loss": 2.91, "learning_rate": 3.7340563164362396e-06, "epoch": 3.1479939333228737, "total_flos": 3004487871663544320, "step": 782500 }, { "loss": 2.8725, "learning_rate": 3.733245191040309e-06, "epoch": 3.1483962328670683, "total_flos": 3004872256886937600, "step": 782600 }, { "loss": 2.935, "learning_rate": 3.7324340656443785e-06, "epoch": 3.148798532411263, "total_flos": 3005254703506913280, "step": 782700 }, { "loss": 2.9225, "learning_rate": 3.731622940248448e-06, "epoch": 3.1492008319554574, "total_flos": 3005637962746951680, "step": 782800 }, { "loss": 2.8725, "learning_rate": 3.7308118148525173e-06, "epoch": 3.149603131499652, "total_flos": 3006020674929039360, "step": 782900 }, { "loss": 2.9, "learning_rate": 3.7300006894565873e-06, "epoch": 3.1500054310438466, "total_flos": 3006396631210997760, "step": 783000 }, { "loss": 2.9075, "learning_rate": 3.729189564060656e-06, "epoch": 3.150407730588041, "total_flos": 3006786412656506880, "step": 783100 }, { "loss": 2.8625, "learning_rate": 3.728378438664726e-06, "epoch": 3.1508100301322357, "total_flos": 3007152447537960960, "step": 783200 }, { "loss": 2.9675, "learning_rate": 3.727567313268795e-06, "epoch": 3.1512123296764303, "total_flos": 3007544783690987520, "step": 783300 }, { "loss": 2.9125, "learning_rate": 3.726756187872865e-06, "epoch": 3.1516146292206253, "total_flos": 3007941437883955200, "step": 783400 }, { "loss": 2.905, "learning_rate": 3.7259450624769337e-06, "epoch": 3.15201692876482, "total_flos": 3008322062747842560, "step": 783500 }, { "loss": 2.8725, "learning_rate": 3.7251339370810038e-06, "epoch": 3.1524192283090144, "total_flos": 3008696829311539200, "step": 783600 }, { "loss": 2.88, "learning_rate": 3.7243228116850725e-06, "epoch": 3.152821527853209, "total_flos": 3009078787297228800, "step": 783700 }, { "loss": 2.8975, "learning_rate": 3.7235116862891426e-06, "epoch": 3.1532238273974036, "total_flos": 3009451779906017280, "step": 783800 }, { "loss": 2.905, "learning_rate": 3.7227005608932114e-06, "epoch": 3.153626126941598, "total_flos": 3009826732363192320, "step": 783900 }, { "loss": 2.9225, "learning_rate": 3.7218894354972814e-06, "epoch": 3.1540284264857927, "total_flos": 3010201270543472640, "step": 784000 }, { "loss": 2.91, "learning_rate": 3.72107831010135e-06, "epoch": 3.1544307260299873, "total_flos": 3010573912610273280, "step": 784100 }, { "loss": 2.92, "learning_rate": 3.72026718470542e-06, "epoch": 3.154833025574182, "total_flos": 3010948387055646720, "step": 784200 }, { "loss": 2.945, "learning_rate": 3.719456059309489e-06, "epoch": 3.1552353251183765, "total_flos": 3011319042717849600, "step": 784300 }, { "loss": 2.93, "learning_rate": 3.718644933913559e-06, "epoch": 3.155637624662571, "total_flos": 3011693017906452480, "step": 784400 }, { "loss": 2.925, "learning_rate": 3.717833808517628e-06, "epoch": 3.156039924206766, "total_flos": 3012073918954936320, "step": 784500 }, { "loss": 2.9525, "learning_rate": 3.717022683121698e-06, "epoch": 3.1564422237509606, "total_flos": 3012460423363983360, "step": 784600 }, { "loss": 2.8975, "learning_rate": 3.7162115577257666e-06, "epoch": 3.156844523295155, "total_flos": 3012856063109683200, "step": 784700 }, { "loss": 2.97, "learning_rate": 3.7154004323298367e-06, "epoch": 3.1572468228393498, "total_flos": 3013237691798353920, "step": 784800 }, { "loss": 2.9425, "learning_rate": 3.7145893069339054e-06, "epoch": 3.1576491223835443, "total_flos": 3013609037922048000, "step": 784900 }, { "loss": 2.9275, "learning_rate": 3.7137781815379755e-06, "epoch": 3.158051421927739, "total_flos": 3013991797905315840, "step": 785000 }, { "loss": 2.8675, "learning_rate": 3.7129670561420442e-06, "epoch": 3.1584537214719335, "total_flos": 3014374637557217280, "step": 785100 }, { "loss": 2.8875, "learning_rate": 3.7121559307461143e-06, "epoch": 3.158856021016128, "total_flos": 3014747667344701440, "step": 785200 }, { "loss": 2.915, "learning_rate": 3.711344805350183e-06, "epoch": 3.1592583205603226, "total_flos": 3015125317912934400, "step": 785300 }, { "loss": 2.9, "learning_rate": 3.710533679954253e-06, "epoch": 3.1596606201045176, "total_flos": 3015525785577830400, "step": 785400 }, { "loss": 2.9075, "learning_rate": 3.709722554558322e-06, "epoch": 3.160062919648712, "total_flos": 3015927713834342400, "step": 785500 }, { "loss": 2.8625, "learning_rate": 3.708911429162392e-06, "epoch": 3.160465219192907, "total_flos": 3016335123292846080, "step": 785600 }, { "loss": 2.8625, "learning_rate": 3.708100303766461e-06, "epoch": 3.1608675187371014, "total_flos": 3016717798296238080, "step": 785700 }, { "loss": 2.88, "learning_rate": 3.7072891783705307e-06, "epoch": 3.161269818281296, "total_flos": 3017112689156782080, "step": 785800 }, { "loss": 2.8825, "learning_rate": 3.7064780529746e-06, "epoch": 3.1616721178254905, "total_flos": 3017492347374581760, "step": 785900 }, { "loss": 2.915, "learning_rate": 3.7056669275786695e-06, "epoch": 3.162074417369685, "total_flos": 3017877757667727360, "step": 786000 }, { "loss": 2.8725, "learning_rate": 3.7048558021827387e-06, "epoch": 3.1624767169138797, "total_flos": 3018259327932733440, "step": 786100 }, { "loss": 2.9, "learning_rate": 3.7040446767868084e-06, "epoch": 3.1628790164580742, "total_flos": 3018646878656501760, "step": 786200 }, { "loss": 2.8675, "learning_rate": 3.7032335513908776e-06, "epoch": 3.163281316002269, "total_flos": 3019037956045117440, "step": 786300 }, { "loss": 2.9125, "learning_rate": 3.702422425994947e-06, "epoch": 3.1636836155464634, "total_flos": 3019415739394406400, "step": 786400 }, { "loss": 2.905, "learning_rate": 3.7016113005990164e-06, "epoch": 3.1640859150906584, "total_flos": 3019801245289912320, "step": 786500 }, { "loss": 2.8525, "learning_rate": 3.700800175203086e-06, "epoch": 3.164488214634853, "total_flos": 3020185067521628160, "step": 786600 }, { "loss": 2.8825, "learning_rate": 3.699989049807155e-06, "epoch": 3.1648905141790475, "total_flos": 3020581509264906240, "step": 786700 }, { "loss": 2.9025, "learning_rate": 3.699177924411225e-06, "epoch": 3.165292813723242, "total_flos": 3020978391841290240, "step": 786800 }, { "loss": 2.9225, "learning_rate": 3.698366799015294e-06, "epoch": 3.1656951132674367, "total_flos": 3021354948293621760, "step": 786900 }, { "loss": 2.8925, "learning_rate": 3.6975556736193636e-06, "epoch": 3.1660974128116313, "total_flos": 3021746902037207040, "step": 787000 }, { "loss": 2.9, "learning_rate": 3.696744548223433e-06, "epoch": 3.166499712355826, "total_flos": 3022107822192384000, "step": 787100 }, { "loss": 2.93, "learning_rate": 3.6959334228275024e-06, "epoch": 3.1669020119000204, "total_flos": 3022485701144033280, "step": 787200 }, { "loss": 2.875, "learning_rate": 3.6951222974315716e-06, "epoch": 3.167304311444215, "total_flos": 3022870091678668800, "step": 787300 }, { "loss": 2.9275, "learning_rate": 3.6943111720356413e-06, "epoch": 3.1677066109884096, "total_flos": 3023251592897525760, "step": 787400 }, { "loss": 2.9075, "learning_rate": 3.693500046639711e-06, "epoch": 3.1681089105326046, "total_flos": 3023647014882293760, "step": 787500 }, { "loss": 2.9025, "learning_rate": 3.69268892124378e-06, "epoch": 3.168511210076799, "total_flos": 3024032998789601280, "step": 787600 }, { "loss": 2.8375, "learning_rate": 3.6918777958478497e-06, "epoch": 3.1689135096209937, "total_flos": 3024414983331502080, "step": 787700 }, { "loss": 2.9125, "learning_rate": 3.691066670451919e-06, "epoch": 3.1693158091651883, "total_flos": 3024799357932410880, "step": 787800 }, { "loss": 2.9425, "learning_rate": 3.6902555450559885e-06, "epoch": 3.169718108709383, "total_flos": 3025174942427412480, "step": 787900 }, { "loss": 2.8875, "learning_rate": 3.6894444196600577e-06, "epoch": 3.1701204082535774, "total_flos": 3025551854732974080, "step": 788000 }, { "loss": 2.9225, "learning_rate": 3.6886332942641273e-06, "epoch": 3.170522707797772, "total_flos": 3025939856912332800, "step": 788100 }, { "loss": 2.9525, "learning_rate": 3.6878221688681965e-06, "epoch": 3.1709250073419666, "total_flos": 3026319940029511680, "step": 788200 }, { "loss": 2.8375, "learning_rate": 3.687011043472266e-06, "epoch": 3.171327306886161, "total_flos": 3026700580827125760, "step": 788300 }, { "loss": 2.905, "learning_rate": 3.6861999180763353e-06, "epoch": 3.1717296064303557, "total_flos": 3027080069085173760, "step": 788400 }, { "loss": 2.8975, "learning_rate": 3.685388792680405e-06, "epoch": 3.1721319059745507, "total_flos": 3027458282645084160, "step": 788500 }, { "loss": 2.8475, "learning_rate": 3.684577667284474e-06, "epoch": 3.1725342055187453, "total_flos": 3027843692938229760, "step": 788600 }, { "loss": 2.925, "learning_rate": 3.6837665418885438e-06, "epoch": 3.17293650506294, "total_flos": 3028228051605411840, "step": 788700 }, { "loss": 2.9225, "learning_rate": 3.682955416492613e-06, "epoch": 3.1733388046071345, "total_flos": 3028610498225387520, "step": 788800 }, { "loss": 2.9325, "learning_rate": 3.6821442910966826e-06, "epoch": 3.173741104151329, "total_flos": 3028993789332879360, "step": 788900 }, { "loss": 2.8975, "learning_rate": 3.6813331657007518e-06, "epoch": 3.1741434036955236, "total_flos": 3029368938306017280, "step": 789000 }, { "loss": 2.97, "learning_rate": 3.6805220403048214e-06, "epoch": 3.174545703239718, "total_flos": 3029740178204866560, "step": 789100 }, { "loss": 2.87, "learning_rate": 3.6797109149088906e-06, "epoch": 3.1749480027839128, "total_flos": 3030105857233090560, "step": 789200 }, { "loss": 2.8775, "learning_rate": 3.67889978951296e-06, "epoch": 3.1753503023281073, "total_flos": 3030481218655918080, "step": 789300 }, { "loss": 2.92, "learning_rate": 3.6780886641170294e-06, "epoch": 3.175752601872302, "total_flos": 3030868801247139840, "step": 789400 }, { "loss": 2.8575, "learning_rate": 3.677277538721099e-06, "epoch": 3.1761549014164965, "total_flos": 3031258545513953280, "step": 789500 }, { "loss": 2.9175, "learning_rate": 3.6764664133251682e-06, "epoch": 3.1765572009606915, "total_flos": 3031647206287349760, "step": 789600 }, { "loss": 2.885, "learning_rate": 3.675655287929238e-06, "epoch": 3.176959500504886, "total_flos": 3032029334232791040, "step": 789700 }, { "loss": 2.885, "learning_rate": 3.674844162533307e-06, "epoch": 3.1773618000490806, "total_flos": 3032424044511098880, "step": 789800 }, { "loss": 2.915, "learning_rate": 3.6740330371373767e-06, "epoch": 3.177764099593275, "total_flos": 3032803899244861440, "step": 789900 }, { "loss": 2.935, "learning_rate": 3.673221911741446e-06, "epoch": 3.17816639913747, "total_flos": 3033184141699307520, "step": 790000 }, { "loss": 2.885, "learning_rate": 3.6724107863455155e-06, "epoch": 3.1785686986816644, "total_flos": 3033570858558044160, "step": 790100 }, { "loss": 2.905, "learning_rate": 3.6715996609495847e-06, "epoch": 3.178970998225859, "total_flos": 3033956842465351680, "step": 790200 }, { "loss": 2.885, "learning_rate": 3.6707885355536543e-06, "epoch": 3.1793732977700535, "total_flos": 3034347872052787200, "step": 790300 }, { "loss": 2.87, "learning_rate": 3.6699774101577235e-06, "epoch": 3.179775597314248, "total_flos": 3034743230302648320, "step": 790400 }, { "loss": 2.8725, "learning_rate": 3.669166284761793e-06, "epoch": 3.1801778968584427, "total_flos": 3035131609580206080, "step": 790500 }, { "loss": 2.9075, "learning_rate": 3.6683551593658623e-06, "epoch": 3.1805801964026377, "total_flos": 3035508909606451200, "step": 790600 }, { "loss": 2.9375, "learning_rate": 3.667544033969932e-06, "epoch": 3.1809824959468322, "total_flos": 3035905234502400000, "step": 790700 }, { "loss": 2.915, "learning_rate": 3.666732908574001e-06, "epoch": 3.181384795491027, "total_flos": 3036292408127969280, "step": 790800 }, { "loss": 2.9125, "learning_rate": 3.6659217831780707e-06, "epoch": 3.1817870950352214, "total_flos": 3036667456187504640, "step": 790900 }, { "loss": 2.9, "learning_rate": 3.66511065778214e-06, "epoch": 3.182189394579416, "total_flos": 3037053875616675840, "step": 791000 }, { "loss": 2.9575, "learning_rate": 3.6642995323862095e-06, "epoch": 3.1825916941236105, "total_flos": 3037446588867901440, "step": 791100 }, { "loss": 2.9275, "learning_rate": 3.6634884069902787e-06, "epoch": 3.182993993667805, "total_flos": 3037833842162104320, "step": 791200 }, { "loss": 2.9175, "learning_rate": 3.6626772815943484e-06, "epoch": 3.1833962932119997, "total_flos": 3038214217397606400, "step": 791300 }, { "loss": 2.905, "learning_rate": 3.6618661561984176e-06, "epoch": 3.1837985927561943, "total_flos": 3038607265257093120, "step": 791400 }, { "loss": 2.89, "learning_rate": 3.661055030802487e-06, "epoch": 3.184200892300389, "total_flos": 3038992197538437120, "step": 791500 }, { "loss": 2.855, "learning_rate": 3.6602439054065564e-06, "epoch": 3.184603191844584, "total_flos": 3039387826661652480, "step": 791600 }, { "loss": 2.925, "learning_rate": 3.659432780010626e-06, "epoch": 3.1850054913887784, "total_flos": 3039758726640998400, "step": 791700 }, { "loss": 2.8975, "learning_rate": 3.658621654614695e-06, "epoch": 3.185407790932973, "total_flos": 3040138597308487680, "step": 791800 }, { "loss": 2.89, "learning_rate": 3.657810529218765e-06, "epoch": 3.1858100904771676, "total_flos": 3040514983801067520, "step": 791900 }, { "loss": 2.89, "learning_rate": 3.656999403822834e-06, "epoch": 3.186212390021362, "total_flos": 3040903187807631360, "step": 792000 }, { "loss": 2.865, "learning_rate": 3.6561882784269036e-06, "epoch": 3.1866146895655567, "total_flos": 3041285220150712320, "step": 792100 }, { "loss": 2.895, "learning_rate": 3.655377153030973e-06, "epoch": 3.1870169891097513, "total_flos": 3041663200015964160, "step": 792200 }, { "loss": 2.91, "learning_rate": 3.6545660276350424e-06, "epoch": 3.187419288653946, "total_flos": 3042040229168855040, "step": 792300 }, { "loss": 2.8925, "learning_rate": 3.653754902239112e-06, "epoch": 3.1878215881981404, "total_flos": 3042425835977963520, "step": 792400 }, { "loss": 2.8975, "learning_rate": 3.6529437768431813e-06, "epoch": 3.188223887742335, "total_flos": 3042833946520442880, "step": 792500 }, { "loss": 2.89, "learning_rate": 3.652132651447251e-06, "epoch": 3.1886261872865296, "total_flos": 3043228322190489600, "step": 792600 }, { "loss": 2.9725, "learning_rate": 3.65132152605132e-06, "epoch": 3.1890284868307246, "total_flos": 3043619442069043200, "step": 792700 }, { "loss": 2.885, "learning_rate": 3.6505104006553897e-06, "epoch": 3.189430786374919, "total_flos": 3044016462737725440, "step": 792800 }, { "loss": 2.9375, "learning_rate": 3.649699275259459e-06, "epoch": 3.1898330859191137, "total_flos": 3044398325121054720, "step": 792900 }, { "loss": 2.905, "learning_rate": 3.6488881498635285e-06, "epoch": 3.1902353854633083, "total_flos": 3044790533804267520, "step": 793000 }, { "loss": 2.9225, "learning_rate": 3.6480770244675977e-06, "epoch": 3.190637685007503, "total_flos": 3045174122341324800, "step": 793100 }, { "loss": 2.9, "learning_rate": 3.6472658990716673e-06, "epoch": 3.1910399845516975, "total_flos": 3045560201850992640, "step": 793200 }, { "loss": 2.9025, "learning_rate": 3.6464547736757365e-06, "epoch": 3.191442284095892, "total_flos": 3045933258194688000, "step": 793300 }, { "loss": 2.9, "learning_rate": 3.645643648279806e-06, "epoch": 3.1918445836400866, "total_flos": 3046313341311866880, "step": 793400 }, { "loss": 2.9125, "learning_rate": 3.6448325228838753e-06, "epoch": 3.192246883184281, "total_flos": 3046696504949544960, "step": 793500 }, { "loss": 2.8275, "learning_rate": 3.644021397487945e-06, "epoch": 3.192649182728476, "total_flos": 3047083067782256640, "step": 793600 }, { "loss": 2.87, "learning_rate": 3.643210272092014e-06, "epoch": 3.1930514822726708, "total_flos": 3047469747462297600, "step": 793700 }, { "loss": 2.9225, "learning_rate": 3.6423991466960838e-06, "epoch": 3.1934537818168653, "total_flos": 3047840668686612480, "step": 793800 }, { "loss": 2.8575, "learning_rate": 3.641588021300153e-06, "epoch": 3.19385608136106, "total_flos": 3048222902856898560, "step": 793900 }, { "loss": 2.8825, "learning_rate": 3.6407768959042226e-06, "epoch": 3.1942583809052545, "total_flos": 3048600856165939200, "step": 794000 }, { "loss": 2.9075, "learning_rate": 3.6399657705082918e-06, "epoch": 3.194660680449449, "total_flos": 3048996671182632960, "step": 794100 }, { "loss": 2.9025, "learning_rate": 3.6391546451123614e-06, "epoch": 3.1950629799936436, "total_flos": 3049401371907594240, "step": 794200 }, { "loss": 2.905, "learning_rate": 3.6383435197164306e-06, "epoch": 3.195465279537838, "total_flos": 3049786548506081280, "step": 794300 }, { "loss": 2.835, "learning_rate": 3.6375323943205006e-06, "epoch": 3.195867579082033, "total_flos": 3050165011694376960, "step": 794400 }, { "loss": 2.925, "learning_rate": 3.6367212689245694e-06, "epoch": 3.1962698786262274, "total_flos": 3050559950356101120, "step": 794500 }, { "loss": 2.9, "learning_rate": 3.6359101435286395e-06, "epoch": 3.196672178170422, "total_flos": 3050954963375216640, "step": 794600 }, { "loss": 2.875, "learning_rate": 3.6350990181327082e-06, "epoch": 3.197074477714617, "total_flos": 3051333484987176960, "step": 794700 }, { "loss": 2.8725, "learning_rate": 3.6342878927367783e-06, "epoch": 3.1974767772588115, "total_flos": 3051727738498652160, "step": 794800 }, { "loss": 2.8575, "learning_rate": 3.633476767340847e-06, "epoch": 3.197879076803006, "total_flos": 3052121503375841280, "step": 794900 }, { "loss": 2.9375, "learning_rate": 3.632665641944917e-06, "epoch": 3.1982813763472007, "total_flos": 3052496195582146560, "step": 795000 }, { "loss": 2.885, "learning_rate": 3.631854516548986e-06, "epoch": 3.1986836758913952, "total_flos": 3052907514114938880, "step": 795100 }, { "loss": 2.91, "learning_rate": 3.631043391153056e-06, "epoch": 3.19908597543559, "total_flos": 3053294900190197760, "step": 795200 }, { "loss": 2.9625, "learning_rate": 3.6302322657571247e-06, "epoch": 3.1994882749797844, "total_flos": 3053668546081781760, "step": 795300 }, { "loss": 2.855, "learning_rate": 3.6294211403611947e-06, "epoch": 3.199890574523979, "total_flos": 3054040455197153280, "step": 795400 }, { "loss": 2.865, "learning_rate": 3.6286100149652635e-06, "epoch": 3.2002928740681735, "total_flos": 3054413277846190080, "step": 795500 }, { "loss": 2.9275, "learning_rate": 3.6277988895693335e-06, "epoch": 3.200695173612368, "total_flos": 3054800440849274880, "step": 795600 }, { "loss": 2.9125, "learning_rate": 3.6269877641734023e-06, "epoch": 3.2010974731565627, "total_flos": 3055182393523722240, "step": 795700 }, { "loss": 2.8725, "learning_rate": 3.6261766387774723e-06, "epoch": 3.2014997727007577, "total_flos": 3055574304777369600, "step": 795800 }, { "loss": 2.915, "learning_rate": 3.625365513381541e-06, "epoch": 3.2019020722449523, "total_flos": 3055960405532006400, "step": 795900 }, { "loss": 2.9025, "learning_rate": 3.624554387985611e-06, "epoch": 3.202304371789147, "total_flos": 3056330896545699840, "step": 796000 }, { "loss": 2.88, "learning_rate": 3.62374326258968e-06, "epoch": 3.2027066713333414, "total_flos": 3056703692638525440, "step": 796100 }, { "loss": 2.88, "learning_rate": 3.62293213719375e-06, "epoch": 3.203108970877536, "total_flos": 3057086431376824320, "step": 796200 }, { "loss": 2.9, "learning_rate": 3.6221210117978187e-06, "epoch": 3.2035112704217306, "total_flos": 3057466992505804800, "step": 796300 }, { "loss": 2.92, "learning_rate": 3.6213098864018888e-06, "epoch": 3.203913569965925, "total_flos": 3057852588692428800, "step": 796400 }, { "loss": 2.895, "learning_rate": 3.6204987610059576e-06, "epoch": 3.2043158695101197, "total_flos": 3058246204854835200, "step": 796500 }, { "loss": 2.8975, "learning_rate": 3.6196876356100276e-06, "epoch": 3.2047181690543143, "total_flos": 3058639964420782080, "step": 796600 }, { "loss": 2.93, "learning_rate": 3.6188765102140964e-06, "epoch": 3.2051204685985093, "total_flos": 3059016117218703360, "step": 796700 }, { "loss": 2.8625, "learning_rate": 3.6180653848181664e-06, "epoch": 3.205522768142704, "total_flos": 3059410036121917440, "step": 796800 }, { "loss": 2.85, "learning_rate": 3.6172542594222356e-06, "epoch": 3.2059250676868984, "total_flos": 3059803965647616000, "step": 796900 }, { "loss": 2.835, "learning_rate": 3.6164431340263052e-06, "epoch": 3.206327367231093, "total_flos": 3060177723075287040, "step": 797000 }, { "loss": 2.8875, "learning_rate": 3.615632008630375e-06, "epoch": 3.2067296667752876, "total_flos": 3060579773490370560, "step": 797100 }, { "loss": 2.8775, "learning_rate": 3.614820883234444e-06, "epoch": 3.207131966319482, "total_flos": 3060966793089914880, "step": 797200 }, { "loss": 2.8925, "learning_rate": 3.6140097578385137e-06, "epoch": 3.2075342658636767, "total_flos": 3061338776562677760, "step": 797300 }, { "loss": 2.8575, "learning_rate": 3.613198632442583e-06, "epoch": 3.2079365654078713, "total_flos": 3061731999693158400, "step": 797400 }, { "loss": 2.8925, "learning_rate": 3.6123875070466525e-06, "epoch": 3.208338864952066, "total_flos": 3062099208359147520, "step": 797500 }, { "loss": 2.9375, "learning_rate": 3.6115763816507217e-06, "epoch": 3.2087411644962605, "total_flos": 3062473943055390720, "step": 797600 }, { "loss": 2.9625, "learning_rate": 3.6107652562547913e-06, "epoch": 3.209143464040455, "total_flos": 3062858434503628800, "step": 797700 }, { "loss": 2.8725, "learning_rate": 3.6099541308588605e-06, "epoch": 3.20954576358465, "total_flos": 3063234794439997440, "step": 797800 }, { "loss": 2.9275, "learning_rate": 3.60914300546293e-06, "epoch": 3.2099480631288446, "total_flos": 3063624586507991040, "step": 797900 }, { "loss": 2.8975, "learning_rate": 3.6083318800669993e-06, "epoch": 3.210350362673039, "total_flos": 3064013374751201280, "step": 798000 }, { "loss": 2.885, "learning_rate": 3.607520754671069e-06, "epoch": 3.2107526622172338, "total_flos": 3064405992400066560, "step": 798100 }, { "loss": 2.91, "learning_rate": 3.606709629275138e-06, "epoch": 3.2111549617614283, "total_flos": 3064784014755256320, "step": 798200 }, { "loss": 2.8975, "learning_rate": 3.6058985038792077e-06, "epoch": 3.211557261305623, "total_flos": 3065163402099701760, "step": 798300 }, { "loss": 2.8625, "learning_rate": 3.605087378483277e-06, "epoch": 3.2119595608498175, "total_flos": 3065530058396497920, "step": 798400 }, { "loss": 2.9175, "learning_rate": 3.6042762530873466e-06, "epoch": 3.212361860394012, "total_flos": 3065926415159900160, "step": 798500 }, { "loss": 2.93, "learning_rate": 3.6034651276914158e-06, "epoch": 3.2127641599382066, "total_flos": 3066320796141189120, "step": 798600 }, { "loss": 2.9175, "learning_rate": 3.6026540022954854e-06, "epoch": 3.213166459482401, "total_flos": 3066708909856634880, "step": 798700 }, { "loss": 2.8775, "learning_rate": 3.6018428768995546e-06, "epoch": 3.2135687590265962, "total_flos": 3067088849570273280, "step": 798800 }, { "loss": 2.905, "learning_rate": 3.601031751503624e-06, "epoch": 3.213971058570791, "total_flos": 3067461242008688640, "step": 798900 }, { "loss": 2.89, "learning_rate": 3.6002206261076934e-06, "epoch": 3.2143733581149854, "total_flos": 3067846030886492160, "step": 799000 }, { "loss": 2.84, "learning_rate": 3.599409500711763e-06, "epoch": 3.21477565765918, "total_flos": 3068231834211563520, "step": 799100 }, { "loss": 2.8475, "learning_rate": 3.598598375315832e-06, "epoch": 3.2151779572033745, "total_flos": 3068591984236615680, "step": 799200 }, { "loss": 2.9, "learning_rate": 3.597787249919902e-06, "epoch": 3.215580256747569, "total_flos": 3068986466131507200, "step": 799300 }, { "loss": 2.935, "learning_rate": 3.596976124523971e-06, "epoch": 3.2159825562917637, "total_flos": 3069376704343848960, "step": 799400 }, { "loss": 2.8675, "learning_rate": 3.5961649991280406e-06, "epoch": 3.2163848558359582, "total_flos": 3069751842694502400, "step": 799500 }, { "loss": 2.835, "learning_rate": 3.59535387373211e-06, "epoch": 3.216787155380153, "total_flos": 3070147748002314240, "step": 799600 }, { "loss": 2.9475, "learning_rate": 3.5945427483361795e-06, "epoch": 3.2171894549243474, "total_flos": 3070522057799178240, "step": 799700 }, { "loss": 2.9375, "learning_rate": 3.5937316229402486e-06, "epoch": 3.2175917544685424, "total_flos": 3070891210379827200, "step": 799800 }, { "loss": 2.905, "learning_rate": 3.5929204975443183e-06, "epoch": 3.217994054012737, "total_flos": 3071283307526952960, "step": 799900 }, { "loss": 2.96, "learning_rate": 3.5921093721483875e-06, "epoch": 3.2183963535569315, "total_flos": 3071663316286740480, "step": 800000 }, { "loss": 2.925, "learning_rate": 3.591298246752457e-06, "epoch": 3.218798653101126, "total_flos": 3072047048227338240, "step": 800100 }, { "loss": 2.895, "learning_rate": 3.5904871213565263e-06, "epoch": 3.2192009526453207, "total_flos": 3072432214203340800, "step": 800200 }, { "loss": 2.86, "learning_rate": 3.589675995960596e-06, "epoch": 3.2196032521895153, "total_flos": 3072802381231257600, "step": 800300 }, { "loss": 2.8425, "learning_rate": 3.588864870564665e-06, "epoch": 3.22000555173371, "total_flos": 3073192582264903680, "step": 800400 }, { "loss": 2.8825, "learning_rate": 3.5880537451687347e-06, "epoch": 3.2204078512779044, "total_flos": 3073575799015004160, "step": 800500 }, { "loss": 2.9425, "learning_rate": 3.587242619772804e-06, "epoch": 3.220810150822099, "total_flos": 3073948807557519360, "step": 800600 }, { "loss": 2.89, "learning_rate": 3.5864314943768735e-06, "epoch": 3.2212124503662936, "total_flos": 3074353949115586560, "step": 800700 }, { "loss": 2.865, "learning_rate": 3.5856203689809427e-06, "epoch": 3.221614749910488, "total_flos": 3074733426751150080, "step": 800800 }, { "loss": 2.895, "learning_rate": 3.5848092435850123e-06, "epoch": 3.222017049454683, "total_flos": 3075113201816279040, "step": 800900 }, { "loss": 2.94, "learning_rate": 3.5839981181890815e-06, "epoch": 3.2224193489988777, "total_flos": 3075486072266496000, "step": 801000 }, { "loss": 2.8525, "learning_rate": 3.583186992793151e-06, "epoch": 3.2228216485430723, "total_flos": 3075854125420001280, "step": 801100 }, { "loss": 2.9625, "learning_rate": 3.5823758673972204e-06, "epoch": 3.223223948087267, "total_flos": 3076265629846272000, "step": 801200 }, { "loss": 2.8775, "learning_rate": 3.58156474200129e-06, "epoch": 3.2236262476314614, "total_flos": 3076653754184202240, "step": 801300 }, { "loss": 2.8975, "learning_rate": 3.580753616605359e-06, "epoch": 3.224028547175656, "total_flos": 3077058539889039360, "step": 801400 }, { "loss": 2.9175, "learning_rate": 3.5799424912094288e-06, "epoch": 3.2244308467198506, "total_flos": 3077439047905597440, "step": 801500 }, { "loss": 2.9425, "learning_rate": 3.579131365813498e-06, "epoch": 3.224833146264045, "total_flos": 3077825764764334080, "step": 801600 }, { "loss": 2.9, "learning_rate": 3.5783202404175676e-06, "epoch": 3.2252354458082397, "total_flos": 3078206538343004160, "step": 801700 }, { "loss": 2.9025, "learning_rate": 3.5775091150216372e-06, "epoch": 3.2256377453524347, "total_flos": 3078582898279372800, "step": 801800 }, { "loss": 2.895, "learning_rate": 3.5766979896257064e-06, "epoch": 3.2260400448966293, "total_flos": 3078957909160212480, "step": 801900 }, { "loss": 2.8925, "learning_rate": 3.575886864229776e-06, "epoch": 3.226442344440824, "total_flos": 3079333435231549440, "step": 802000 }, { "loss": 2.91, "learning_rate": 3.5750757388338452e-06, "epoch": 3.2268446439850185, "total_flos": 3079719413827614720, "step": 802100 }, { "loss": 2.9525, "learning_rate": 3.574264613437915e-06, "epoch": 3.227246943529213, "total_flos": 3080095662227896320, "step": 802200 }, { "loss": 2.8575, "learning_rate": 3.573453488041984e-06, "epoch": 3.2276492430734076, "total_flos": 3080480690111600640, "step": 802300 }, { "loss": 2.875, "learning_rate": 3.5726423626460537e-06, "epoch": 3.228051542617602, "total_flos": 3080862063860643840, "step": 802400 }, { "loss": 2.9275, "learning_rate": 3.571831237250123e-06, "epoch": 3.2284538421617968, "total_flos": 3081246624355031040, "step": 802500 }, { "loss": 2.8475, "learning_rate": 3.5710201118541925e-06, "epoch": 3.2288561417059913, "total_flos": 3081625825805998080, "step": 802600 }, { "loss": 2.8625, "learning_rate": 3.5702089864582617e-06, "epoch": 3.229258441250186, "total_flos": 3082012808226846720, "step": 802700 }, { "loss": 2.88, "learning_rate": 3.5693978610623313e-06, "epoch": 3.2296607407943805, "total_flos": 3082419543157585920, "step": 802800 }, { "loss": 2.8625, "learning_rate": 3.5685867356664005e-06, "epoch": 3.2300630403385755, "total_flos": 3082802860821288960, "step": 802900 }, { "loss": 2.8925, "learning_rate": 3.56777561027047e-06, "epoch": 3.23046533988277, "total_flos": 3083191680931952640, "step": 803000 }, { "loss": 2.91, "learning_rate": 3.5669644848745393e-06, "epoch": 3.2308676394269646, "total_flos": 3083565268399872000, "step": 803100 }, { "loss": 2.9525, "learning_rate": 3.566153359478609e-06, "epoch": 3.231269938971159, "total_flos": 3083964275473152000, "step": 803200 }, { "loss": 2.86, "learning_rate": 3.565342234082678e-06, "epoch": 3.231672238515354, "total_flos": 3084338696806103040, "step": 803300 }, { "loss": 2.8825, "learning_rate": 3.5645311086867477e-06, "epoch": 3.2320745380595484, "total_flos": 3084719810304276480, "step": 803400 }, { "loss": 2.8825, "learning_rate": 3.563719983290817e-06, "epoch": 3.232476837603743, "total_flos": 3085102793359718400, "step": 803500 }, { "loss": 2.87, "learning_rate": 3.5629088578948866e-06, "epoch": 3.2328791371479375, "total_flos": 3085482164770437120, "step": 803600 }, { "loss": 2.825, "learning_rate": 3.5620977324989558e-06, "epoch": 3.233281436692132, "total_flos": 3085875924336384000, "step": 803700 }, { "loss": 2.92, "learning_rate": 3.5612866071030254e-06, "epoch": 3.2336837362363267, "total_flos": 3086269758259722240, "step": 803800 }, { "loss": 2.8675, "learning_rate": 3.5604754817070946e-06, "epoch": 3.2340860357805212, "total_flos": 3086649990091683840, "step": 803900 }, { "loss": 2.8825, "learning_rate": 3.559664356311164e-06, "epoch": 3.2344883353247162, "total_flos": 3087039782159677440, "step": 804000 }, { "loss": 2.9525, "learning_rate": 3.5588532309152334e-06, "epoch": 3.234890634868911, "total_flos": 3087439129152460800, "step": 804100 }, { "loss": 2.84, "learning_rate": 3.558042105519303e-06, "epoch": 3.2352929344131054, "total_flos": 3087816758475724800, "step": 804200 }, { "loss": 2.9075, "learning_rate": 3.557230980123372e-06, "epoch": 3.2356952339573, "total_flos": 3088211798051051520, "step": 804300 }, { "loss": 2.895, "learning_rate": 3.556419854727442e-06, "epoch": 3.2360975335014945, "total_flos": 3088584934063380480, "step": 804400 }, { "loss": 2.9025, "learning_rate": 3.555608729331511e-06, "epoch": 3.236499833045689, "total_flos": 3088971555319756800, "step": 804500 }, { "loss": 2.875, "learning_rate": 3.5547976039355806e-06, "epoch": 3.2369021325898837, "total_flos": 3089362531794769920, "step": 804600 }, { "loss": 2.9, "learning_rate": 3.55398647853965e-06, "epoch": 3.2373044321340783, "total_flos": 3089735306642626560, "step": 804700 }, { "loss": 2.8575, "learning_rate": 3.5531753531437195e-06, "epoch": 3.237706731678273, "total_flos": 3090111613466572800, "step": 804800 }, { "loss": 2.9175, "learning_rate": 3.5523642277477886e-06, "epoch": 3.238109031222468, "total_flos": 3090473532135290880, "step": 804900 }, { "loss": 2.8725, "learning_rate": 3.5515531023518583e-06, "epoch": 3.2385113307666624, "total_flos": 3090867907805337600, "step": 805000 }, { "loss": 2.875, "learning_rate": 3.5507419769559275e-06, "epoch": 3.238913630310857, "total_flos": 3091255501019043840, "step": 805100 }, { "loss": 2.8675, "learning_rate": 3.549930851559997e-06, "epoch": 3.2393159298550516, "total_flos": 3091646472182814720, "step": 805200 }, { "loss": 2.8475, "learning_rate": 3.5491197261640663e-06, "epoch": 3.239718229399246, "total_flos": 3092028286764963840, "step": 805300 }, { "loss": 2.89, "learning_rate": 3.548308600768136e-06, "epoch": 3.2401205289434407, "total_flos": 3092409405574379520, "step": 805400 }, { "loss": 2.935, "learning_rate": 3.547497475372205e-06, "epoch": 3.2405228284876353, "total_flos": 3092780172772669440, "step": 805500 }, { "loss": 2.905, "learning_rate": 3.5466863499762747e-06, "epoch": 3.24092512803183, "total_flos": 3093171117380229120, "step": 805600 }, { "loss": 2.865, "learning_rate": 3.545875224580344e-06, "epoch": 3.2413274275760244, "total_flos": 3093547556985231360, "step": 805700 }, { "loss": 2.8975, "learning_rate": 3.545064099184414e-06, "epoch": 3.241729727120219, "total_flos": 3093934958994216960, "step": 805800 }, { "loss": 2.9275, "learning_rate": 3.5442529737884827e-06, "epoch": 3.2421320266644136, "total_flos": 3094333217182341120, "step": 805900 }, { "loss": 2.86, "learning_rate": 3.5434418483925528e-06, "epoch": 3.2425343262086086, "total_flos": 3094703060224481280, "step": 806000 }, { "loss": 2.91, "learning_rate": 3.5426307229966215e-06, "epoch": 3.242936625752803, "total_flos": 3095073986760038400, "step": 806100 }, { "loss": 2.8675, "learning_rate": 3.5418195976006916e-06, "epoch": 3.2433389252969977, "total_flos": 3095450729105848320, "step": 806200 }, { "loss": 2.87, "learning_rate": 3.5410084722047604e-06, "epoch": 3.2437412248411923, "total_flos": 3095832655224084480, "step": 806300 }, { "loss": 2.88, "learning_rate": 3.5401973468088304e-06, "epoch": 3.244143524385387, "total_flos": 3096205291979642880, "step": 806400 }, { "loss": 2.89, "learning_rate": 3.5393862214129e-06, "epoch": 3.2445458239295815, "total_flos": 3096589087655147520, "step": 806500 }, { "loss": 2.8575, "learning_rate": 3.538575096016969e-06, "epoch": 3.244948123473776, "total_flos": 3096976372816803840, "step": 806600 }, { "loss": 2.89, "learning_rate": 3.537763970621039e-06, "epoch": 3.2453504230179706, "total_flos": 3097391504821524480, "step": 806700 }, { "loss": 2.925, "learning_rate": 3.536952845225108e-06, "epoch": 3.245752722562165, "total_flos": 3097773712435599360, "step": 806800 }, { "loss": 2.94, "learning_rate": 3.5361417198291776e-06, "epoch": 3.2461550221063598, "total_flos": 3098174047319439360, "step": 806900 }, { "loss": 2.9375, "learning_rate": 3.535330594433247e-06, "epoch": 3.2465573216505548, "total_flos": 3098554348197550080, "step": 807000 }, { "loss": 2.915, "learning_rate": 3.5345194690373165e-06, "epoch": 3.2469596211947493, "total_flos": 3098928089691494400, "step": 807100 }, { "loss": 2.925, "learning_rate": 3.5337083436413857e-06, "epoch": 3.247361920738944, "total_flos": 3099306956534200320, "step": 807200 }, { "loss": 2.8675, "learning_rate": 3.5328972182454553e-06, "epoch": 3.2477642202831385, "total_flos": 3099693630902999040, "step": 807300 }, { "loss": 2.8625, "learning_rate": 3.5320860928495245e-06, "epoch": 3.248166519827333, "total_flos": 3100087231131678720, "step": 807400 }, { "loss": 2.8875, "learning_rate": 3.531274967453594e-06, "epoch": 3.2485688193715276, "total_flos": 3100467616989665280, "step": 807500 }, { "loss": 2.8375, "learning_rate": 3.5304638420576633e-06, "epoch": 3.248971118915722, "total_flos": 3100842197659883520, "step": 807600 }, { "loss": 2.855, "learning_rate": 3.529652716661733e-06, "epoch": 3.249373418459917, "total_flos": 3101215296493516800, "step": 807700 }, { "loss": 2.895, "learning_rate": 3.528841591265802e-06, "epoch": 3.2497757180041114, "total_flos": 3101591985726904320, "step": 807800 }, { "loss": 2.9225, "learning_rate": 3.5280304658698717e-06, "epoch": 3.250178017548306, "total_flos": 3101966858515445760, "step": 807900 }, { "loss": 2.91, "learning_rate": 3.527219340473941e-06, "epoch": 3.250580317092501, "total_flos": 3102352523748218880, "step": 808000 }, { "loss": 2.9025, "learning_rate": 3.5264082150780105e-06, "epoch": 3.2509826166366955, "total_flos": 3102750303924541440, "step": 808100 }, { "loss": 2.885, "learning_rate": 3.5255970896820797e-06, "epoch": 3.25138491618089, "total_flos": 3103137015472035840, "step": 808200 }, { "loss": 2.9425, "learning_rate": 3.5247859642861494e-06, "epoch": 3.2517872157250847, "total_flos": 3103534391993948160, "step": 808300 }, { "loss": 2.8775, "learning_rate": 3.5239748388902185e-06, "epoch": 3.2521895152692792, "total_flos": 3103914544157276160, "step": 808400 }, { "loss": 2.885, "learning_rate": 3.523163713494288e-06, "epoch": 3.252591814813474, "total_flos": 3104283723294136320, "step": 808500 }, { "loss": 2.85, "learning_rate": 3.5223525880983574e-06, "epoch": 3.2529941143576684, "total_flos": 3104664103840880640, "step": 808600 }, { "loss": 2.8625, "learning_rate": 3.521541462702427e-06, "epoch": 3.253396413901863, "total_flos": 3105049338863032320, "step": 808700 }, { "loss": 2.925, "learning_rate": 3.520730337306496e-06, "epoch": 3.2537987134460575, "total_flos": 3105420445980825600, "step": 808800 }, { "loss": 2.9, "learning_rate": 3.519919211910566e-06, "epoch": 3.254201012990252, "total_flos": 3105814354261555200, "step": 808900 }, { "loss": 2.895, "learning_rate": 3.519108086514635e-06, "epoch": 3.2546033125344467, "total_flos": 3106184149502515200, "step": 809000 }, { "loss": 2.8825, "learning_rate": 3.5182969611187046e-06, "epoch": 3.2550056120786417, "total_flos": 3106552797515151360, "step": 809100 }, { "loss": 2.885, "learning_rate": 3.517485835722774e-06, "epoch": 3.2554079116228363, "total_flos": 3106941543268423680, "step": 809200 }, { "loss": 2.8575, "learning_rate": 3.5166747103268434e-06, "epoch": 3.255810211167031, "total_flos": 3107327362527221760, "step": 809300 }, { "loss": 2.865, "learning_rate": 3.5158635849309126e-06, "epoch": 3.2562125107112254, "total_flos": 3107702591168993280, "step": 809400 }, { "loss": 2.825, "learning_rate": 3.5150524595349822e-06, "epoch": 3.25661481025542, "total_flos": 3108085940700149760, "step": 809500 }, { "loss": 2.8925, "learning_rate": 3.5142413341390514e-06, "epoch": 3.2570171097996146, "total_flos": 3108470203764971520, "step": 809600 }, { "loss": 2.88, "learning_rate": 3.513430208743121e-06, "epoch": 3.257419409343809, "total_flos": 3108847349765191680, "step": 809700 }, { "loss": 2.8675, "learning_rate": 3.5126190833471903e-06, "epoch": 3.2578217088880037, "total_flos": 3109228872229017600, "step": 809800 }, { "loss": 2.945, "learning_rate": 3.51180795795126e-06, "epoch": 3.2582240084321983, "total_flos": 3109592113397053440, "step": 809900 }, { "loss": 2.925, "learning_rate": 3.510996832555329e-06, "epoch": 3.2586263079763933, "total_flos": 3109965477792798720, "step": 810000 }, { "loss": 2.895, "learning_rate": 3.5101857071593987e-06, "epoch": 3.259028607520588, "total_flos": 3110360389898311680, "step": 810100 }, { "loss": 2.8675, "learning_rate": 3.509374581763468e-06, "epoch": 3.2594309070647824, "total_flos": 3110744387401021440, "step": 810200 }, { "loss": 2.89, "learning_rate": 3.5085634563675375e-06, "epoch": 3.259833206608977, "total_flos": 3111108000356014080, "step": 810300 }, { "loss": 2.895, "learning_rate": 3.5077523309716067e-06, "epoch": 3.2602355061531716, "total_flos": 3111484886105364480, "step": 810400 }, { "loss": 2.875, "learning_rate": 3.5069412055756763e-06, "epoch": 3.260637805697366, "total_flos": 3111875389879818240, "step": 810500 }, { "loss": 2.8575, "learning_rate": 3.5061300801797455e-06, "epoch": 3.2610401052415607, "total_flos": 3112272378681047040, "step": 810600 }, { "loss": 2.8675, "learning_rate": 3.505318954783815e-06, "epoch": 3.2614424047857553, "total_flos": 3112665782393763840, "step": 810700 }, { "loss": 2.89, "learning_rate": 3.5045078293878843e-06, "epoch": 3.26184470432995, "total_flos": 3113035131490375680, "step": 810800 }, { "loss": 2.8975, "learning_rate": 3.503696703991954e-06, "epoch": 3.2622470038741445, "total_flos": 3113424355255449600, "step": 810900 }, { "loss": 2.9275, "learning_rate": 3.502885578596023e-06, "epoch": 3.262649303418339, "total_flos": 3113803625752565760, "step": 811000 }, { "loss": 2.875, "learning_rate": 3.5020744532000928e-06, "epoch": 3.263051602962534, "total_flos": 3114176358110484480, "step": 811100 }, { "loss": 2.8975, "learning_rate": 3.501263327804162e-06, "epoch": 3.2634539025067286, "total_flos": 3114569804313139200, "step": 811200 }, { "loss": 2.87, "learning_rate": 3.5004522024082316e-06, "epoch": 3.263856202050923, "total_flos": 3114963038066104320, "step": 811300 }, { "loss": 2.9225, "learning_rate": 3.499641077012301e-06, "epoch": 3.2642585015951178, "total_flos": 3115349096330803200, "step": 811400 }, { "loss": 2.95, "learning_rate": 3.4988299516163704e-06, "epoch": 3.2646608011393123, "total_flos": 3115733157568419840, "step": 811500 }, { "loss": 2.93, "learning_rate": 3.49801882622044e-06, "epoch": 3.265063100683507, "total_flos": 3116124033129830400, "step": 811600 }, { "loss": 2.91, "learning_rate": 3.497207700824509e-06, "epoch": 3.2654654002277015, "total_flos": 3116508556445521920, "step": 811700 }, { "loss": 2.9075, "learning_rate": 3.496396575428579e-06, "epoch": 3.265867699771896, "total_flos": 3116889515917670400, "step": 811800 }, { "loss": 2.8925, "learning_rate": 3.495585450032648e-06, "epoch": 3.2662699993160906, "total_flos": 3117276604563363840, "step": 811900 }, { "loss": 2.8425, "learning_rate": 3.4947743246367176e-06, "epoch": 3.266672298860285, "total_flos": 3117658817488680960, "step": 812000 }, { "loss": 2.87, "learning_rate": 3.493963199240787e-06, "epoch": 3.26707459840448, "total_flos": 3118035559834490880, "step": 812100 }, { "loss": 2.9325, "learning_rate": 3.4931520738448565e-06, "epoch": 3.267476897948675, "total_flos": 3118426398217205760, "step": 812200 }, { "loss": 2.9575, "learning_rate": 3.4923409484489257e-06, "epoch": 3.2678791974928694, "total_flos": 3118814453508986880, "step": 812300 }, { "loss": 2.9225, "learning_rate": 3.4915298230529953e-06, "epoch": 3.268281497037064, "total_flos": 3119174561044101120, "step": 812400 }, { "loss": 2.8525, "learning_rate": 3.4907186976570645e-06, "epoch": 3.2686837965812585, "total_flos": 3119576728306513920, "step": 812500 }, { "loss": 2.875, "learning_rate": 3.489907572261134e-06, "epoch": 3.269086096125453, "total_flos": 3119963248649287680, "step": 812600 }, { "loss": 2.905, "learning_rate": 3.4890964468652033e-06, "epoch": 3.2694883956696477, "total_flos": 3120351033067714560, "step": 812700 }, { "loss": 2.9075, "learning_rate": 3.488285321469273e-06, "epoch": 3.2698906952138422, "total_flos": 3120712388744755200, "step": 812800 }, { "loss": 2.91, "learning_rate": 3.487474196073342e-06, "epoch": 3.270292994758037, "total_flos": 3121105303823185920, "step": 812900 }, { "loss": 2.885, "learning_rate": 3.4866630706774117e-06, "epoch": 3.2706952943022314, "total_flos": 3121489710291548160, "step": 813000 }, { "loss": 2.94, "learning_rate": 3.485851945281481e-06, "epoch": 3.2710975938464264, "total_flos": 3121859717982197760, "step": 813100 }, { "loss": 2.9025, "learning_rate": 3.4850408198855505e-06, "epoch": 3.271499893390621, "total_flos": 3122237278259312640, "step": 813200 }, { "loss": 2.905, "learning_rate": 3.4842296944896197e-06, "epoch": 3.2719021929348155, "total_flos": 3122611827062077440, "step": 813300 }, { "loss": 2.89, "learning_rate": 3.4834185690936894e-06, "epoch": 3.27230449247901, "total_flos": 3123009925912934400, "step": 813400 }, { "loss": 2.8725, "learning_rate": 3.4826074436977585e-06, "epoch": 3.2727067920232047, "total_flos": 3123402432025712640, "step": 813500 }, { "loss": 2.8875, "learning_rate": 3.481796318301828e-06, "epoch": 3.2731090915673993, "total_flos": 3123790556363642880, "step": 813600 }, { "loss": 2.89, "learning_rate": 3.4809851929058974e-06, "epoch": 3.273511391111594, "total_flos": 3124179928843499520, "step": 813700 }, { "loss": 2.8625, "learning_rate": 3.480174067509967e-06, "epoch": 3.2739136906557884, "total_flos": 3124566066776832000, "step": 813800 }, { "loss": 2.9075, "learning_rate": 3.479362942114036e-06, "epoch": 3.274315990199983, "total_flos": 3124941210438727680, "step": 813900 }, { "loss": 2.8875, "learning_rate": 3.478551816718106e-06, "epoch": 3.2747182897441776, "total_flos": 3125337333507471360, "step": 814000 }, { "loss": 2.875, "learning_rate": 3.477740691322175e-06, "epoch": 3.275120589288372, "total_flos": 3125739474213672960, "step": 814100 }, { "loss": 2.84, "learning_rate": 3.4769295659262446e-06, "epoch": 3.275522888832567, "total_flos": 3126121968634828800, "step": 814200 }, { "loss": 2.8975, "learning_rate": 3.476118440530314e-06, "epoch": 3.2759251883767617, "total_flos": 3126500973569832960, "step": 814300 }, { "loss": 2.875, "learning_rate": 3.4753073151343834e-06, "epoch": 3.2763274879209563, "total_flos": 3126864931755571200, "step": 814400 }, { "loss": 2.8575, "learning_rate": 3.4744961897384526e-06, "epoch": 3.276729787465151, "total_flos": 3127246985343621120, "step": 814500 }, { "loss": 2.9175, "learning_rate": 3.4736850643425222e-06, "epoch": 3.2771320870093454, "total_flos": 3127617943746631680, "step": 814600 }, { "loss": 2.855, "learning_rate": 3.4728739389465914e-06, "epoch": 3.27753438655354, "total_flos": 3128001680998471680, "step": 814700 }, { "loss": 2.95, "learning_rate": 3.472062813550661e-06, "epoch": 3.2779366860977346, "total_flos": 3128387378098698240, "step": 814800 }, { "loss": 2.8825, "learning_rate": 3.4712516881547303e-06, "epoch": 3.278338985641929, "total_flos": 3128768358815815680, "step": 814900 }, { "loss": 2.9025, "learning_rate": 3.4704405627588e-06, "epoch": 3.2787412851861237, "total_flos": 3129146402415974400, "step": 815000 }, { "loss": 2.8775, "learning_rate": 3.469629437362869e-06, "epoch": 3.2791435847303188, "total_flos": 3129528641897502720, "step": 815100 }, { "loss": 2.8425, "learning_rate": 3.4688183119669387e-06, "epoch": 3.279545884274513, "total_flos": 3129911470926919680, "step": 815200 }, { "loss": 2.895, "learning_rate": 3.468007186571008e-06, "epoch": 3.279948183818708, "total_flos": 3130312644987033600, "step": 815300 }, { "loss": 2.8475, "learning_rate": 3.4671960611750775e-06, "epoch": 3.2803504833629025, "total_flos": 3130695882982103040, "step": 815400 }, { "loss": 2.8975, "learning_rate": 3.4663849357791467e-06, "epoch": 3.280752782907097, "total_flos": 3131073018359838720, "step": 815500 }, { "loss": 2.915, "learning_rate": 3.4655738103832163e-06, "epoch": 3.2811550824512916, "total_flos": 3131453706958632960, "step": 815600 }, { "loss": 2.89, "learning_rate": 3.4647626849872855e-06, "epoch": 3.281557381995486, "total_flos": 3131836488186869760, "step": 815700 }, { "loss": 2.89, "learning_rate": 3.463951559591355e-06, "epoch": 3.2819596815396808, "total_flos": 3132230741698344960, "step": 815800 }, { "loss": 2.9175, "learning_rate": 3.4631404341954243e-06, "epoch": 3.2823619810838753, "total_flos": 3132624915541186560, "step": 815900 }, { "loss": 2.8725, "learning_rate": 3.462329308799494e-06, "epoch": 3.28276428062807, "total_flos": 3133006427382528000, "step": 816000 }, { "loss": 2.9125, "learning_rate": 3.461518183403564e-06, "epoch": 3.2831665801722645, "total_flos": 3133391593358530560, "step": 816100 }, { "loss": 2.8725, "learning_rate": 3.4607070580076328e-06, "epoch": 3.2835688797164595, "total_flos": 3133777035519129600, "step": 816200 }, { "loss": 2.8975, "learning_rate": 3.459895932611703e-06, "epoch": 3.283971179260654, "total_flos": 3134154011559598080, "step": 816300 }, { "loss": 2.8975, "learning_rate": 3.4590848072157716e-06, "epoch": 3.2843734788048486, "total_flos": 3134535640248268800, "step": 816400 }, { "loss": 2.86, "learning_rate": 3.4582736818198416e-06, "epoch": 3.284775778349043, "total_flos": 3134924752477255680, "step": 816500 }, { "loss": 2.86, "learning_rate": 3.4574625564239104e-06, "epoch": 3.285178077893238, "total_flos": 3135304054841825280, "step": 816600 }, { "loss": 2.89, "learning_rate": 3.4566514310279804e-06, "epoch": 3.2855803774374324, "total_flos": 3135673929751418880, "step": 816700 }, { "loss": 2.855, "learning_rate": 3.455840305632049e-06, "epoch": 3.285982676981627, "total_flos": 3136053062156236800, "step": 816800 }, { "loss": 2.905, "learning_rate": 3.4550291802361193e-06, "epoch": 3.2863849765258215, "total_flos": 3136435769027082240, "step": 816900 }, { "loss": 2.8725, "learning_rate": 3.4542180548401884e-06, "epoch": 3.286787276070016, "total_flos": 3136820791599544320, "step": 817000 }, { "loss": 2.8825, "learning_rate": 3.453406929444258e-06, "epoch": 3.2871895756142107, "total_flos": 3137202558380513280, "step": 817100 }, { "loss": 2.8775, "learning_rate": 3.4525958040483273e-06, "epoch": 3.2875918751584052, "total_flos": 3137588807849932800, "step": 817200 }, { "loss": 2.88, "learning_rate": 3.451784678652397e-06, "epoch": 3.2879941747026002, "total_flos": 3137946668729579520, "step": 817300 }, { "loss": 2.8775, "learning_rate": 3.450973553256466e-06, "epoch": 3.288396474246795, "total_flos": 3138341580835092480, "step": 817400 }, { "loss": 2.8525, "learning_rate": 3.4501624278605357e-06, "epoch": 3.2887987737909894, "total_flos": 3138723836250347520, "step": 817500 }, { "loss": 2.88, "learning_rate": 3.449351302464605e-06, "epoch": 3.289201073335184, "total_flos": 3139115343849584640, "step": 817600 }, { "loss": 2.885, "learning_rate": 3.4485401770686745e-06, "epoch": 3.2896033728793785, "total_flos": 3139486620927129600, "step": 817700 }, { "loss": 2.8725, "learning_rate": 3.4477290516727437e-06, "epoch": 3.290005672423573, "total_flos": 3139860303997409280, "step": 817800 }, { "loss": 2.9075, "learning_rate": 3.4469179262768133e-06, "epoch": 3.2904079719677677, "total_flos": 3140239468269680640, "step": 817900 }, { "loss": 2.95, "learning_rate": 3.4461068008808825e-06, "epoch": 3.2908102715119623, "total_flos": 3140643377619548160, "step": 818000 }, { "loss": 2.87, "learning_rate": 3.445295675484952e-06, "epoch": 3.291212571056157, "total_flos": 3141029940452259840, "step": 818100 }, { "loss": 2.845, "learning_rate": 3.4444845500890213e-06, "epoch": 3.291614870600352, "total_flos": 3141398126386821120, "step": 818200 }, { "loss": 2.875, "learning_rate": 3.443673424693091e-06, "epoch": 3.292017170144546, "total_flos": 3141773679014369280, "step": 818300 }, { "loss": 2.8125, "learning_rate": 3.44286229929716e-06, "epoch": 3.292419469688741, "total_flos": 3142163996895344640, "step": 818400 }, { "loss": 2.8775, "learning_rate": 3.4420511739012298e-06, "epoch": 3.2928217692329356, "total_flos": 3142550639396689920, "step": 818500 }, { "loss": 2.88, "learning_rate": 3.441240048505299e-06, "epoch": 3.29322406877713, "total_flos": 3142935040553809920, "step": 818600 }, { "loss": 2.855, "learning_rate": 3.4404289231093686e-06, "epoch": 3.2936263683213247, "total_flos": 3143308102208747520, "step": 818700 }, { "loss": 2.9, "learning_rate": 3.4396177977134378e-06, "epoch": 3.2940286678655193, "total_flos": 3143695169609472000, "step": 818800 }, { "loss": 2.845, "learning_rate": 3.4388066723175074e-06, "epoch": 3.294430967409714, "total_flos": 3144066760050309120, "step": 818900 }, { "loss": 2.8725, "learning_rate": 3.4379955469215766e-06, "epoch": 3.2948332669539084, "total_flos": 3144435673625057280, "step": 819000 }, { "loss": 2.925, "learning_rate": 3.4371844215256462e-06, "epoch": 3.295235566498103, "total_flos": 3144819692372736000, "step": 819100 }, { "loss": 2.935, "learning_rate": 3.4363732961297154e-06, "epoch": 3.2956378660422976, "total_flos": 3145198904446187520, "step": 819200 }, { "loss": 2.9525, "learning_rate": 3.435562170733785e-06, "epoch": 3.2960401655864926, "total_flos": 3145572316643112960, "step": 819300 }, { "loss": 2.855, "learning_rate": 3.4347510453378542e-06, "epoch": 3.296442465130687, "total_flos": 3145964530637568000, "step": 819400 }, { "loss": 2.8475, "learning_rate": 3.433939919941924e-06, "epoch": 3.2968447646748817, "total_flos": 3146359989801031680, "step": 819500 }, { "loss": 2.8725, "learning_rate": 3.433128794545993e-06, "epoch": 3.2972470642190763, "total_flos": 3146741172345354240, "step": 819600 }, { "loss": 2.8625, "learning_rate": 3.4323176691500627e-06, "epoch": 3.297649363763271, "total_flos": 3147135856067450880, "step": 819700 }, { "loss": 2.8675, "learning_rate": 3.431506543754132e-06, "epoch": 3.2980516633074655, "total_flos": 3147527592050104320, "step": 819800 }, { "loss": 2.885, "learning_rate": 3.4306954183582015e-06, "epoch": 3.29845396285166, "total_flos": 3147907855749519360, "step": 819900 }, { "loss": 2.8675, "learning_rate": 3.4298842929622707e-06, "epoch": 3.2988562623958546, "total_flos": 3148278065267374080, "step": 820000 }, { "loss": 2.8125, "learning_rate": 3.4290731675663403e-06, "epoch": 3.299258561940049, "total_flos": 3148658759177410560, "step": 820100 }, { "loss": 2.8675, "learning_rate": 3.4282620421704095e-06, "epoch": 3.2996608614842438, "total_flos": 3149044020755773440, "step": 820200 }, { "loss": 2.8925, "learning_rate": 3.427450916774479e-06, "epoch": 3.3000631610284383, "total_flos": 3149425861894133760, "step": 820300 }, { "loss": 2.8725, "learning_rate": 3.4266397913785483e-06, "epoch": 3.3004654605726333, "total_flos": 3149800070777395200, "step": 820400 }, { "loss": 2.9125, "learning_rate": 3.425828665982618e-06, "epoch": 3.300867760116828, "total_flos": 3150186585808926720, "step": 820500 }, { "loss": 2.9, "learning_rate": 3.425017540586687e-06, "epoch": 3.3012700596610225, "total_flos": 3150579649602140160, "step": 820600 }, { "loss": 2.885, "learning_rate": 3.4242064151907567e-06, "epoch": 3.301672359205217, "total_flos": 3150973791577528320, "step": 820700 }, { "loss": 2.87, "learning_rate": 3.4233952897948264e-06, "epoch": 3.3020746587494116, "total_flos": 3151338721720596480, "step": 820800 }, { "loss": 2.8675, "learning_rate": 3.4225841643988956e-06, "epoch": 3.302476958293606, "total_flos": 3151727462162626560, "step": 820900 }, { "loss": 2.825, "learning_rate": 3.421773039002965e-06, "epoch": 3.302879257837801, "total_flos": 3152107497478625280, "step": 821000 }, { "loss": 2.91, "learning_rate": 3.4209619136070344e-06, "epoch": 3.3032815573819954, "total_flos": 3152481297396234240, "step": 821100 }, { "loss": 2.8725, "learning_rate": 3.420150788211104e-06, "epoch": 3.30368385692619, "total_flos": 3152862857038755840, "step": 821200 }, { "loss": 2.89, "learning_rate": 3.419339662815173e-06, "epoch": 3.304086156470385, "total_flos": 3153274600470927360, "step": 821300 }, { "loss": 2.865, "learning_rate": 3.418528537419243e-06, "epoch": 3.3044884560145795, "total_flos": 3153649250187294720, "step": 821400 }, { "loss": 2.935, "learning_rate": 3.417717412023312e-06, "epoch": 3.304890755558774, "total_flos": 3154031983614351360, "step": 821500 }, { "loss": 2.8775, "learning_rate": 3.4169062866273816e-06, "epoch": 3.3052930551029687, "total_flos": 3154410229041715200, "step": 821600 }, { "loss": 2.895, "learning_rate": 3.416095161231451e-06, "epoch": 3.3056953546471632, "total_flos": 3154788182350755840, "step": 821700 }, { "loss": 2.8575, "learning_rate": 3.4152840358355204e-06, "epoch": 3.306097654191358, "total_flos": 3155190291189504000, "step": 821800 }, { "loss": 2.8525, "learning_rate": 3.4144729104395896e-06, "epoch": 3.3064999537355524, "total_flos": 3155588597178808320, "step": 821900 }, { "loss": 2.845, "learning_rate": 3.4136617850436593e-06, "epoch": 3.306902253279747, "total_flos": 3155966454885488640, "step": 822000 }, { "loss": 2.885, "learning_rate": 3.4128506596477284e-06, "epoch": 3.3073045528239415, "total_flos": 3156354573912176640, "step": 822100 }, { "loss": 2.85, "learning_rate": 3.412039534251798e-06, "epoch": 3.307706852368136, "total_flos": 3156719015420958720, "step": 822200 }, { "loss": 2.91, "learning_rate": 3.4112284088558673e-06, "epoch": 3.3081091519123307, "total_flos": 3157098593970124800, "step": 822300 }, { "loss": 2.8575, "learning_rate": 3.410417283459937e-06, "epoch": 3.3085114514565257, "total_flos": 3157491471869859840, "step": 822400 }, { "loss": 2.91, "learning_rate": 3.409606158064006e-06, "epoch": 3.3089137510007203, "total_flos": 3157887504647485440, "step": 822500 }, { "loss": 2.8375, "learning_rate": 3.4087950326680757e-06, "epoch": 3.309316050544915, "total_flos": 3158252227652106240, "step": 822600 }, { "loss": 2.925, "learning_rate": 3.407983907272145e-06, "epoch": 3.3097183500891094, "total_flos": 3158644409779107840, "step": 822700 }, { "loss": 2.855, "learning_rate": 3.4071727818762145e-06, "epoch": 3.310120649633304, "total_flos": 3159027977071196160, "step": 822800 }, { "loss": 2.8425, "learning_rate": 3.4063616564802837e-06, "epoch": 3.3105229491774986, "total_flos": 3159412813750179840, "step": 822900 }, { "loss": 2.885, "learning_rate": 3.4055505310843533e-06, "epoch": 3.310925248721693, "total_flos": 3159783405677475840, "step": 823000 }, { "loss": 2.895, "learning_rate": 3.4047394056884225e-06, "epoch": 3.3113275482658877, "total_flos": 3160163834025400320, "step": 823100 }, { "loss": 2.8975, "learning_rate": 3.403928280292492e-06, "epoch": 3.3117298478100823, "total_flos": 3160554274064947200, "step": 823200 }, { "loss": 2.87, "learning_rate": 3.4031171548965613e-06, "epoch": 3.312132147354277, "total_flos": 3160936417944115200, "step": 823300 }, { "loss": 2.8825, "learning_rate": 3.402306029500631e-06, "epoch": 3.3125344468984714, "total_flos": 3161313144356198400, "step": 823400 }, { "loss": 2.92, "learning_rate": 3.4014949041047e-06, "epoch": 3.3129367464426664, "total_flos": 3161700073664624640, "step": 823500 }, { "loss": 2.9025, "learning_rate": 3.4006837787087698e-06, "epoch": 3.313339045986861, "total_flos": 3162093530489763840, "step": 823600 }, { "loss": 2.8825, "learning_rate": 3.399872653312839e-06, "epoch": 3.3137413455310556, "total_flos": 3162476120513280000, "step": 823700 }, { "loss": 2.8825, "learning_rate": 3.3990615279169086e-06, "epoch": 3.31414364507525, "total_flos": 3162856426702632960, "step": 823800 }, { "loss": 2.92, "learning_rate": 3.3982504025209778e-06, "epoch": 3.3145459446194447, "total_flos": 3163230072594216960, "step": 823900 }, { "loss": 2.8975, "learning_rate": 3.3974392771250474e-06, "epoch": 3.3149482441636393, "total_flos": 3163642596778997760, "step": 824000 }, { "loss": 2.8975, "learning_rate": 3.3966281517291166e-06, "epoch": 3.315350543707834, "total_flos": 3164025202736240640, "step": 824100 }, { "loss": 2.8625, "learning_rate": 3.3958170263331862e-06, "epoch": 3.3157528432520285, "total_flos": 3164412785327462400, "step": 824200 }, { "loss": 2.89, "learning_rate": 3.3950059009372554e-06, "epoch": 3.316155142796223, "total_flos": 3164798142508185600, "step": 824300 }, { "loss": 2.8675, "learning_rate": 3.394194775541325e-06, "epoch": 3.316557442340418, "total_flos": 3165182798604933120, "step": 824400 }, { "loss": 2.8725, "learning_rate": 3.3933836501453942e-06, "epoch": 3.3169597418846126, "total_flos": 3165566705816524800, "step": 824500 }, { "loss": 2.8925, "learning_rate": 3.392572524749464e-06, "epoch": 3.317362041428807, "total_flos": 3165942003504445440, "step": 824600 }, { "loss": 2.855, "learning_rate": 3.391761399353533e-06, "epoch": 3.3177643409730018, "total_flos": 3166316531062241280, "step": 824700 }, { "loss": 2.93, "learning_rate": 3.3909502739576027e-06, "epoch": 3.3181666405171963, "total_flos": 3166693188428175360, "step": 824800 }, { "loss": 2.8575, "learning_rate": 3.390139148561672e-06, "epoch": 3.318568940061391, "total_flos": 3167088488254371840, "step": 824900 }, { "loss": 2.925, "learning_rate": 3.3893280231657415e-06, "epoch": 3.3189712396055855, "total_flos": 3167457444319057920, "step": 825000 }, { "loss": 2.89, "learning_rate": 3.3885168977698107e-06, "epoch": 3.31937353914978, "total_flos": 3167829273765795840, "step": 825100 }, { "loss": 2.89, "learning_rate": 3.3877057723738803e-06, "epoch": 3.3197758386939746, "total_flos": 3168208023761172480, "step": 825200 }, { "loss": 2.89, "learning_rate": 3.3868946469779495e-06, "epoch": 3.320178138238169, "total_flos": 3168592706414131200, "step": 825300 }, { "loss": 2.8675, "learning_rate": 3.386083521582019e-06, "epoch": 3.320580437782364, "total_flos": 3168978026416158720, "step": 825400 }, { "loss": 2.9125, "learning_rate": 3.3852723961860887e-06, "epoch": 3.320982737326559, "total_flos": 3169366341958809600, "step": 825500 }, { "loss": 2.855, "learning_rate": 3.384461270790158e-06, "epoch": 3.3213850368707534, "total_flos": 3169743891613440000, "step": 825600 }, { "loss": 2.93, "learning_rate": 3.3836501453942275e-06, "epoch": 3.321787336414948, "total_flos": 3170102942211348480, "step": 825700 }, { "loss": 2.9025, "learning_rate": 3.3828390199982967e-06, "epoch": 3.3221896359591425, "total_flos": 3170490280485427200, "step": 825800 }, { "loss": 2.855, "learning_rate": 3.3820278946023668e-06, "epoch": 3.322591935503337, "total_flos": 3170874888780994560, "step": 825900 }, { "loss": 2.865, "learning_rate": 3.3812167692064356e-06, "epoch": 3.3229942350475317, "total_flos": 3171251944490096640, "step": 826000 }, { "loss": 2.845, "learning_rate": 3.3804056438105056e-06, "epoch": 3.3233965345917262, "total_flos": 3171645156998092800, "step": 826100 }, { "loss": 2.855, "learning_rate": 3.3795945184145744e-06, "epoch": 3.323798834135921, "total_flos": 3172013863434393600, "step": 826200 }, { "loss": 2.8825, "learning_rate": 3.3787833930186444e-06, "epoch": 3.3242011336801154, "total_flos": 3172407160922265600, "step": 826300 }, { "loss": 2.86, "learning_rate": 3.377972267622713e-06, "epoch": 3.3246034332243104, "total_flos": 3172798487939266560, "step": 826400 }, { "loss": 2.9575, "learning_rate": 3.3771611422267832e-06, "epoch": 3.3250057327685045, "total_flos": 3173177413205637120, "step": 826500 }, { "loss": 2.8875, "learning_rate": 3.376350016830852e-06, "epoch": 3.3254080323126995, "total_flos": 3173563784833628160, "step": 826600 }, { "loss": 2.8225, "learning_rate": 3.375538891434922e-06, "epoch": 3.325810331856894, "total_flos": 3173950225507768320, "step": 826700 }, { "loss": 2.91, "learning_rate": 3.374727766038991e-06, "epoch": 3.3262126314010887, "total_flos": 3174327971678361600, "step": 826800 }, { "loss": 2.9125, "learning_rate": 3.373916640643061e-06, "epoch": 3.3266149309452833, "total_flos": 3174703141896468480, "step": 826900 }, { "loss": 2.92, "learning_rate": 3.3731055152471296e-06, "epoch": 3.327017230489478, "total_flos": 3175077563229419520, "step": 827000 }, { "loss": 2.9, "learning_rate": 3.3722943898511997e-06, "epoch": 3.3274195300336724, "total_flos": 3175448845618206720, "step": 827100 }, { "loss": 2.82, "learning_rate": 3.3714832644552684e-06, "epoch": 3.327821829577867, "total_flos": 3175830538041784320, "step": 827200 }, { "loss": 2.8925, "learning_rate": 3.3706721390593385e-06, "epoch": 3.3282241291220616, "total_flos": 3176211136349460480, "step": 827300 }, { "loss": 2.9625, "learning_rate": 3.3698610136634073e-06, "epoch": 3.328626428666256, "total_flos": 3176597624824780800, "step": 827400 }, { "loss": 2.86, "learning_rate": 3.3690498882674773e-06, "epoch": 3.329028728210451, "total_flos": 3176972269229905920, "step": 827500 }, { "loss": 2.845, "learning_rate": 3.368238762871546e-06, "epoch": 3.3294310277546457, "total_flos": 3177360228919326720, "step": 827600 }, { "loss": 2.8425, "learning_rate": 3.367427637475616e-06, "epoch": 3.3298333272988403, "total_flos": 3177743360689551360, "step": 827700 }, { "loss": 2.8875, "learning_rate": 3.366616512079685e-06, "epoch": 3.330235626843035, "total_flos": 3178115142335109120, "step": 827800 }, { "loss": 2.9125, "learning_rate": 3.365805386683755e-06, "epoch": 3.3306379263872294, "total_flos": 3178501232467261440, "step": 827900 }, { "loss": 2.9125, "learning_rate": 3.3649942612878237e-06, "epoch": 3.331040225931424, "total_flos": 3178868053412567040, "step": 828000 }, { "loss": 2.895, "learning_rate": 3.3641831358918938e-06, "epoch": 3.3314425254756186, "total_flos": 3179249209400678400, "step": 828100 }, { "loss": 2.905, "learning_rate": 3.3633720104959625e-06, "epoch": 3.331844825019813, "total_flos": 3179636165265315840, "step": 828200 }, { "loss": 2.865, "learning_rate": 3.3625608851000326e-06, "epoch": 3.3322471245640077, "total_flos": 3180031194218158080, "step": 828300 }, { "loss": 2.8675, "learning_rate": 3.3617497597041018e-06, "epoch": 3.3326494241082023, "total_flos": 3180403400763095040, "step": 828400 }, { "loss": 2.875, "learning_rate": 3.3609386343081714e-06, "epoch": 3.333051723652397, "total_flos": 3180803443528611840, "step": 828500 }, { "loss": 2.885, "learning_rate": 3.3601275089122406e-06, "epoch": 3.333454023196592, "total_flos": 3181193644562257920, "step": 828600 }, { "loss": 2.8475, "learning_rate": 3.35931638351631e-06, "epoch": 3.3338563227407865, "total_flos": 3181572867258193920, "step": 828700 }, { "loss": 2.87, "learning_rate": 3.3585052581203794e-06, "epoch": 3.334258622284981, "total_flos": 3181961055331031040, "step": 828800 }, { "loss": 2.9175, "learning_rate": 3.357694132724449e-06, "epoch": 3.3346609218291756, "total_flos": 3182348090864302080, "step": 828900 }, { "loss": 2.8625, "learning_rate": 3.356883007328518e-06, "epoch": 3.33506322137337, "total_flos": 3182724456111912960, "step": 829000 }, { "loss": 2.94, "learning_rate": 3.356071881932588e-06, "epoch": 3.3354655209175648, "total_flos": 3183121152794818560, "step": 829100 }, { "loss": 2.9025, "learning_rate": 3.355260756536657e-06, "epoch": 3.3358678204617593, "total_flos": 3183521429254993920, "step": 829200 }, { "loss": 2.8925, "learning_rate": 3.3544496311407266e-06, "epoch": 3.336270120005954, "total_flos": 3183914562094356480, "step": 829300 }, { "loss": 2.895, "learning_rate": 3.353638505744796e-06, "epoch": 3.3366724195501485, "total_flos": 3184297274276444160, "step": 829400 }, { "loss": 2.8925, "learning_rate": 3.3528273803488655e-06, "epoch": 3.3370747190943435, "total_flos": 3184680236086917120, "step": 829500 }, { "loss": 2.9025, "learning_rate": 3.3520162549529347e-06, "epoch": 3.337477018638538, "total_flos": 3185064870938695680, "step": 829600 }, { "loss": 2.895, "learning_rate": 3.3512051295570043e-06, "epoch": 3.3378793181827326, "total_flos": 3185444810652334080, "step": 829700 }, { "loss": 2.8625, "learning_rate": 3.3503940041610735e-06, "epoch": 3.338281617726927, "total_flos": 3185817691725035520, "step": 829800 }, { "loss": 2.9025, "learning_rate": 3.349582878765143e-06, "epoch": 3.338683917271122, "total_flos": 3186205162780170240, "step": 829900 }, { "loss": 2.95, "learning_rate": 3.3487717533692123e-06, "epoch": 3.3390862168153164, "total_flos": 3186594338744064000, "step": 830000 }, { "loss": 2.88, "learning_rate": 3.347960627973282e-06, "epoch": 3.339488516359511, "total_flos": 3186978909860935680, "step": 830100 }, { "loss": 2.8975, "learning_rate": 3.3471495025773515e-06, "epoch": 3.3398908159037055, "total_flos": 3187372223282534400, "step": 830200 }, { "loss": 2.8375, "learning_rate": 3.3463383771814207e-06, "epoch": 3.3402931154479, "total_flos": 3187756167672821760, "step": 830300 }, { "loss": 2.86, "learning_rate": 3.3455272517854903e-06, "epoch": 3.3406954149920947, "total_flos": 3188138980768512000, "step": 830400 }, { "loss": 2.8575, "learning_rate": 3.3447161263895595e-06, "epoch": 3.3410977145362892, "total_flos": 3188510709301647360, "step": 830500 }, { "loss": 2.88, "learning_rate": 3.343905000993629e-06, "epoch": 3.3415000140804842, "total_flos": 3188879994663352320, "step": 830600 }, { "loss": 2.88, "learning_rate": 3.3430938755976984e-06, "epoch": 3.341902313624679, "total_flos": 3189256524559472640, "step": 830700 }, { "loss": 2.9025, "learning_rate": 3.342282750201768e-06, "epoch": 3.3423046131688734, "total_flos": 3189628816084285440, "step": 830800 }, { "loss": 2.8625, "learning_rate": 3.341471624805837e-06, "epoch": 3.342706912713068, "total_flos": 3190017652128675840, "step": 830900 }, { "loss": 2.88, "learning_rate": 3.3406604994099068e-06, "epoch": 3.3431092122572625, "total_flos": 3190407130833377280, "step": 831000 }, { "loss": 2.895, "learning_rate": 3.339849374013976e-06, "epoch": 3.343511511801457, "total_flos": 3190796545803171840, "step": 831100 }, { "loss": 2.9125, "learning_rate": 3.3390382486180456e-06, "epoch": 3.3439138113456517, "total_flos": 3191178769350973440, "step": 831200 }, { "loss": 2.8175, "learning_rate": 3.338227123222115e-06, "epoch": 3.3443161108898463, "total_flos": 3191563712254801920, "step": 831300 }, { "loss": 2.865, "learning_rate": 3.3374159978261844e-06, "epoch": 3.344718410434041, "total_flos": 3191951964062545920, "step": 831400 }, { "loss": 2.905, "learning_rate": 3.3366048724302536e-06, "epoch": 3.3451207099782354, "total_flos": 3192323076491581440, "step": 831500 }, { "loss": 2.95, "learning_rate": 3.3357937470343232e-06, "epoch": 3.34552300952243, "total_flos": 3192721802069022720, "step": 831600 }, { "loss": 2.85, "learning_rate": 3.3349826216383924e-06, "epoch": 3.345925309066625, "total_flos": 3193107621327820800, "step": 831700 }, { "loss": 2.885, "learning_rate": 3.334171496242462e-06, "epoch": 3.3463276086108196, "total_flos": 3193478346036172800, "step": 831800 }, { "loss": 2.9175, "learning_rate": 3.3333603708465312e-06, "epoch": 3.346729908155014, "total_flos": 3193843961329489920, "step": 831900 }, { "loss": 2.9375, "learning_rate": 3.332549245450601e-06, "epoch": 3.3471322076992087, "total_flos": 3194233216962017280, "step": 832000 }, { "loss": 2.86, "learning_rate": 3.33173812005467e-06, "epoch": 3.3475345072434033, "total_flos": 3194627666989455360, "step": 832100 }, { "loss": 2.8725, "learning_rate": 3.3309269946587397e-06, "epoch": 3.347936806787598, "total_flos": 3195010310125393920, "step": 832200 }, { "loss": 2.9175, "learning_rate": 3.330115869262809e-06, "epoch": 3.3483391063317924, "total_flos": 3195405742732646400, "step": 832300 }, { "loss": 2.9225, "learning_rate": 3.3293047438668785e-06, "epoch": 3.348741405875987, "total_flos": 3195792895113246720, "step": 832400 }, { "loss": 2.895, "learning_rate": 3.3284936184709477e-06, "epoch": 3.3491437054201816, "total_flos": 3196174173259929600, "step": 832500 }, { "loss": 2.8725, "learning_rate": 3.3276824930750173e-06, "epoch": 3.3495460049643766, "total_flos": 3196567279543080960, "step": 832600 }, { "loss": 2.8475, "learning_rate": 3.3268713676790865e-06, "epoch": 3.349948304508571, "total_flos": 3196964130252011520, "step": 832700 }, { "loss": 2.9175, "learning_rate": 3.326060242283156e-06, "epoch": 3.3503506040527657, "total_flos": 3197349168758200320, "step": 832800 }, { "loss": 2.8725, "learning_rate": 3.3252491168872253e-06, "epoch": 3.3507529035969603, "total_flos": 3197723558223697920, "step": 832900 }, { "loss": 2.8625, "learning_rate": 3.324437991491295e-06, "epoch": 3.351155203141155, "total_flos": 3198106982112245760, "step": 833000 }, { "loss": 2.8825, "learning_rate": 3.323626866095364e-06, "epoch": 3.3515575026853495, "total_flos": 3198478264501032960, "step": 833100 }, { "loss": 2.8575, "learning_rate": 3.3228157406994338e-06, "epoch": 3.351959802229544, "total_flos": 3198868656739399680, "step": 833200 }, { "loss": 2.9125, "learning_rate": 3.322004615303503e-06, "epoch": 3.3523621017737386, "total_flos": 3199248245911050240, "step": 833300 }, { "loss": 2.8525, "learning_rate": 3.3211934899075726e-06, "epoch": 3.352764401317933, "total_flos": 3199640013761157120, "step": 833400 }, { "loss": 2.895, "learning_rate": 3.3203823645116418e-06, "epoch": 3.3531667008621278, "total_flos": 3200024372428339200, "step": 833500 }, { "loss": 2.88, "learning_rate": 3.3195712391157114e-06, "epoch": 3.3535690004063223, "total_flos": 3200430639969761280, "step": 833600 }, { "loss": 2.8725, "learning_rate": 3.3187601137197806e-06, "epoch": 3.3539712999505173, "total_flos": 3200806665297868800, "step": 833700 }, { "loss": 2.86, "learning_rate": 3.31794898832385e-06, "epoch": 3.354373599494712, "total_flos": 3201186472230451200, "step": 833800 }, { "loss": 2.845, "learning_rate": 3.3171378629279194e-06, "epoch": 3.3547758990389065, "total_flos": 3201580242418882560, "step": 833900 }, { "loss": 2.9375, "learning_rate": 3.316326737531989e-06, "epoch": 3.355178198583101, "total_flos": 3201954122005125120, "step": 834000 }, { "loss": 2.87, "learning_rate": 3.315515612136058e-06, "epoch": 3.3555804981272956, "total_flos": 3202340541434296320, "step": 834100 }, { "loss": 2.9575, "learning_rate": 3.314704486740128e-06, "epoch": 3.35598279767149, "total_flos": 3202725479026882560, "step": 834200 }, { "loss": 2.84, "learning_rate": 3.313893361344197e-06, "epoch": 3.356385097215685, "total_flos": 3203100898873374720, "step": 834300 }, { "loss": 2.875, "learning_rate": 3.3130822359482666e-06, "epoch": 3.3567873967598794, "total_flos": 3203495614462924800, "step": 834400 }, { "loss": 2.9375, "learning_rate": 3.312271110552336e-06, "epoch": 3.357189696304074, "total_flos": 3203901133119191040, "step": 834500 }, { "loss": 2.9025, "learning_rate": 3.3114599851564055e-06, "epoch": 3.357591995848269, "total_flos": 3204280594821027840, "step": 834600 }, { "loss": 2.9, "learning_rate": 3.3106488597604747e-06, "epoch": 3.357994295392463, "total_flos": 3204669213104486400, "step": 834700 }, { "loss": 2.8225, "learning_rate": 3.3098377343645443e-06, "epoch": 3.358396594936658, "total_flos": 3205061676727326720, "step": 834800 }, { "loss": 2.915, "learning_rate": 3.3090266089686135e-06, "epoch": 3.3587988944808527, "total_flos": 3205442960185251840, "step": 834900 }, { "loss": 2.8375, "learning_rate": 3.308215483572683e-06, "epoch": 3.3592011940250472, "total_flos": 3205836687883745280, "step": 835000 }, { "loss": 2.86, "learning_rate": 3.3074043581767527e-06, "epoch": 3.359603493569242, "total_flos": 3206228689428510720, "step": 835100 }, { "loss": 2.8725, "learning_rate": 3.306593232780822e-06, "epoch": 3.3600057931134364, "total_flos": 3206593438989342720, "step": 835200 }, { "loss": 2.89, "learning_rate": 3.3057821073848915e-06, "epoch": 3.360408092657631, "total_flos": 3206965634911795200, "step": 835300 }, { "loss": 2.915, "learning_rate": 3.3049709819889607e-06, "epoch": 3.3608103922018255, "total_flos": 3207345967657359360, "step": 835400 }, { "loss": 2.85, "learning_rate": 3.3041598565930303e-06, "epoch": 3.36121269174602, "total_flos": 3207732153391872000, "step": 835500 }, { "loss": 2.8725, "learning_rate": 3.3033487311970995e-06, "epoch": 3.3616149912902147, "total_flos": 3208105740859791360, "step": 835600 }, { "loss": 2.8975, "learning_rate": 3.302537605801169e-06, "epoch": 3.3620172908344097, "total_flos": 3208497880496855040, "step": 835700 }, { "loss": 2.92, "learning_rate": 3.3017264804052384e-06, "epoch": 3.3624195903786043, "total_flos": 3208898773061130240, "step": 835800 }, { "loss": 2.8825, "learning_rate": 3.300915355009308e-06, "epoch": 3.362821889922799, "total_flos": 3209304424498452480, "step": 835900 }, { "loss": 2.8625, "learning_rate": 3.300104229613377e-06, "epoch": 3.3632241894669934, "total_flos": 3209687104813086720, "step": 836000 }, { "loss": 2.8675, "learning_rate": 3.2992931042174468e-06, "epoch": 3.363626489011188, "total_flos": 3210079950845368320, "step": 836100 }, { "loss": 2.86, "learning_rate": 3.298481978821516e-06, "epoch": 3.3640287885553826, "total_flos": 3210447573788252160, "step": 836200 }, { "loss": 2.925, "learning_rate": 3.2976708534255856e-06, "epoch": 3.364431088099577, "total_flos": 3210836149581772800, "step": 836300 }, { "loss": 2.925, "learning_rate": 3.296859728029655e-06, "epoch": 3.3648333876437717, "total_flos": 3211222855818024960, "step": 836400 }, { "loss": 2.8725, "learning_rate": 3.2960486026337244e-06, "epoch": 3.3652356871879663, "total_flos": 3211614474953349120, "step": 836500 }, { "loss": 2.8575, "learning_rate": 3.2952374772377936e-06, "epoch": 3.365637986732161, "total_flos": 3211990888002140160, "step": 836600 }, { "loss": 2.815, "learning_rate": 3.2944263518418632e-06, "epoch": 3.3660402862763554, "total_flos": 3212383654365788160, "step": 836700 }, { "loss": 2.865, "learning_rate": 3.2936152264459324e-06, "epoch": 3.3664425858205504, "total_flos": 3212743028949473280, "step": 836800 }, { "loss": 2.9, "learning_rate": 3.292804101050002e-06, "epoch": 3.366844885364745, "total_flos": 3213109632133847040, "step": 836900 }, { "loss": 2.8575, "learning_rate": 3.2919929756540712e-06, "epoch": 3.3672471849089396, "total_flos": 3213485843355432960, "step": 837000 }, { "loss": 2.835, "learning_rate": 3.2911818502581413e-06, "epoch": 3.367649484453134, "total_flos": 3213881127247902720, "step": 837100 }, { "loss": 2.8975, "learning_rate": 3.29037072486221e-06, "epoch": 3.3680517839973287, "total_flos": 3214273404977264640, "step": 837200 }, { "loss": 2.885, "learning_rate": 3.28955959946628e-06, "epoch": 3.3684540835415233, "total_flos": 3214661284998051840, "step": 837300 }, { "loss": 2.925, "learning_rate": 3.288748474070349e-06, "epoch": 3.368856383085718, "total_flos": 3215048543603496960, "step": 837400 }, { "loss": 2.825, "learning_rate": 3.287937348674419e-06, "epoch": 3.3692586826299125, "total_flos": 3215426964301854720, "step": 837500 }, { "loss": 2.87, "learning_rate": 3.2871262232784877e-06, "epoch": 3.369660982174107, "total_flos": 3215803318926981120, "step": 837600 }, { "loss": 2.9375, "learning_rate": 3.2863150978825577e-06, "epoch": 3.370063281718302, "total_flos": 3216187412032051200, "step": 837700 }, { "loss": 2.8725, "learning_rate": 3.2855039724866265e-06, "epoch": 3.3704655812624966, "total_flos": 3216590280378439680, "step": 837800 }, { "loss": 2.9075, "learning_rate": 3.2846928470906965e-06, "epoch": 3.370867880806691, "total_flos": 3216962991491389440, "step": 837900 }, { "loss": 2.8625, "learning_rate": 3.2838817216947653e-06, "epoch": 3.3712701803508858, "total_flos": 3217343600421550080, "step": 838000 }, { "loss": 2.885, "learning_rate": 3.2830705962988354e-06, "epoch": 3.3716724798950803, "total_flos": 3217730418193889280, "step": 838100 }, { "loss": 2.8925, "learning_rate": 3.282259470902904e-06, "epoch": 3.372074779439275, "total_flos": 3218101960833546240, "step": 838200 }, { "loss": 2.9, "learning_rate": 3.281448345506974e-06, "epoch": 3.3724770789834695, "total_flos": 3218474029286184960, "step": 838300 }, { "loss": 2.865, "learning_rate": 3.280637220111043e-06, "epoch": 3.372879378527664, "total_flos": 3218871623569029120, "step": 838400 }, { "loss": 2.9225, "learning_rate": 3.279826094715113e-06, "epoch": 3.3732816780718586, "total_flos": 3219266657833113600, "step": 838500 }, { "loss": 2.87, "learning_rate": 3.2790149693191818e-06, "epoch": 3.373683977616053, "total_flos": 3219659142700922880, "step": 838600 }, { "loss": 2.925, "learning_rate": 3.278203843923252e-06, "epoch": 3.374086277160248, "total_flos": 3220051802839726080, "step": 838700 }, { "loss": 2.7975, "learning_rate": 3.2773927185273206e-06, "epoch": 3.374488576704443, "total_flos": 3220426723429447680, "step": 838800 }, { "loss": 2.9075, "learning_rate": 3.2765815931313906e-06, "epoch": 3.3748908762486374, "total_flos": 3220821885163345920, "step": 838900 }, { "loss": 2.895, "learning_rate": 3.2757704677354594e-06, "epoch": 3.375293175792832, "total_flos": 3221211703787550720, "step": 839000 }, { "loss": 2.845, "learning_rate": 3.2749593423395294e-06, "epoch": 3.3756954753370265, "total_flos": 3221602308475607040, "step": 839100 }, { "loss": 2.8325, "learning_rate": 3.274148216943598e-06, "epoch": 3.376097774881221, "total_flos": 3221982928028252160, "step": 839200 }, { "loss": 2.895, "learning_rate": 3.2733370915476683e-06, "epoch": 3.3765000744254157, "total_flos": 3222355028348344320, "step": 839300 }, { "loss": 2.9475, "learning_rate": 3.272525966151737e-06, "epoch": 3.3769023739696102, "total_flos": 3222729640886016000, "step": 839400 }, { "loss": 2.8275, "learning_rate": 3.271714840755807e-06, "epoch": 3.377304673513805, "total_flos": 3223120426156308480, "step": 839500 }, { "loss": 2.845, "learning_rate": 3.270903715359876e-06, "epoch": 3.3777069730579994, "total_flos": 3223502044222494720, "step": 839600 }, { "loss": 2.915, "learning_rate": 3.270092589963946e-06, "epoch": 3.378109272602194, "total_flos": 3223887018993776640, "step": 839700 }, { "loss": 2.8925, "learning_rate": 3.2692814645680155e-06, "epoch": 3.3785115721463885, "total_flos": 3224273464979159040, "step": 839800 }, { "loss": 2.8575, "learning_rate": 3.2684703391720847e-06, "epoch": 3.3789138716905835, "total_flos": 3224660075613050880, "step": 839900 }, { "loss": 2.9, "learning_rate": 3.2676592137761543e-06, "epoch": 3.379316171234778, "total_flos": 3225027725112145920, "step": 840000 }, { "loss": 2.89, "learning_rate": 3.2668480883802235e-06, "epoch": 3.3797184707789727, "total_flos": 3225407978189076480, "step": 840100 }, { "loss": 2.8875, "learning_rate": 3.266036962984293e-06, "epoch": 3.3801207703231673, "total_flos": 3225781114201405440, "step": 840200 }, { "loss": 2.8275, "learning_rate": 3.2652258375883623e-06, "epoch": 3.380523069867362, "total_flos": 3226158260201625600, "step": 840300 }, { "loss": 2.8225, "learning_rate": 3.264414712192432e-06, "epoch": 3.3809253694115564, "total_flos": 3226553198863349760, "step": 840400 }, { "loss": 2.895, "learning_rate": 3.263603586796501e-06, "epoch": 3.381327668955751, "total_flos": 3226933759992330240, "step": 840500 }, { "loss": 2.8675, "learning_rate": 3.2627924614005708e-06, "epoch": 3.3817299684999456, "total_flos": 3227326945944115200, "step": 840600 }, { "loss": 2.86, "learning_rate": 3.26198133600464e-06, "epoch": 3.38213226804414, "total_flos": 3227689023950100480, "step": 840700 }, { "loss": 2.885, "learning_rate": 3.2611702106087096e-06, "epoch": 3.382534567588335, "total_flos": 3228070482679019520, "step": 840800 }, { "loss": 2.9025, "learning_rate": 3.2603590852127788e-06, "epoch": 3.3829368671325297, "total_flos": 3228458946936453120, "step": 840900 }, { "loss": 2.855, "learning_rate": 3.2595479598168484e-06, "epoch": 3.3833391666767243, "total_flos": 3228828959938344960, "step": 841000 }, { "loss": 2.8575, "learning_rate": 3.2587368344209176e-06, "epoch": 3.383741466220919, "total_flos": 3229196784708433920, "step": 841100 }, { "loss": 2.885, "learning_rate": 3.257925709024987e-06, "epoch": 3.3841437657651134, "total_flos": 3229587899275745280, "step": 841200 }, { "loss": 2.825, "learning_rate": 3.2571145836290564e-06, "epoch": 3.384546065309308, "total_flos": 3229969772281559040, "step": 841300 }, { "loss": 2.91, "learning_rate": 3.256303458233126e-06, "epoch": 3.3849483648535026, "total_flos": 3230362373996697600, "step": 841400 }, { "loss": 2.875, "learning_rate": 3.2554923328371952e-06, "epoch": 3.385350664397697, "total_flos": 3230744156711393280, "step": 841500 }, { "loss": 2.9025, "learning_rate": 3.254681207441265e-06, "epoch": 3.3857529639418917, "total_flos": 3231115810887137280, "step": 841600 }, { "loss": 2.8125, "learning_rate": 3.253870082045334e-06, "epoch": 3.3861552634860863, "total_flos": 3231514329326131200, "step": 841700 }, { "loss": 2.9025, "learning_rate": 3.2530589566494037e-06, "epoch": 3.386557563030281, "total_flos": 3231899914890270720, "step": 841800 }, { "loss": 2.8775, "learning_rate": 3.252247831253473e-06, "epoch": 3.386959862574476, "total_flos": 3232276110178129920, "step": 841900 }, { "loss": 2.87, "learning_rate": 3.2514367058575425e-06, "epoch": 3.3873621621186705, "total_flos": 3232659528755435520, "step": 842000 }, { "loss": 2.85, "learning_rate": 3.2506255804616117e-06, "epoch": 3.387764461662865, "total_flos": 3233036828781680640, "step": 842100 }, { "loss": 2.8475, "learning_rate": 3.2498144550656813e-06, "epoch": 3.3881667612070596, "total_flos": 3233427157285140480, "step": 842200 }, { "loss": 2.8775, "learning_rate": 3.2490033296697505e-06, "epoch": 3.388569060751254, "total_flos": 3233802848004986880, "step": 842300 }, { "loss": 2.8775, "learning_rate": 3.24819220427382e-06, "epoch": 3.3889713602954488, "total_flos": 3234174958947563520, "step": 842400 }, { "loss": 2.87, "learning_rate": 3.2473810788778893e-06, "epoch": 3.3893736598396433, "total_flos": 3234554957084866560, "step": 842500 }, { "loss": 2.94, "learning_rate": 3.246569953481959e-06, "epoch": 3.389775959383838, "total_flos": 3234941636764907520, "step": 842600 }, { "loss": 2.875, "learning_rate": 3.245758828086028e-06, "epoch": 3.3901782589280325, "total_flos": 3235311697567979520, "step": 842700 }, { "loss": 2.8475, "learning_rate": 3.2449477026900977e-06, "epoch": 3.390580558472227, "total_flos": 3235683059625400320, "step": 842800 }, { "loss": 2.855, "learning_rate": 3.244136577294167e-06, "epoch": 3.3909828580164216, "total_flos": 3236087632880547840, "step": 842900 }, { "loss": 2.9025, "learning_rate": 3.2433254518982365e-06, "epoch": 3.3913851575606166, "total_flos": 3236464045929338880, "step": 843000 }, { "loss": 2.88, "learning_rate": 3.2425143265023057e-06, "epoch": 3.391787457104811, "total_flos": 3236822623826688000, "step": 843100 }, { "loss": 2.8975, "learning_rate": 3.2417032011063754e-06, "epoch": 3.392189756649006, "total_flos": 3237216749868349440, "step": 843200 }, { "loss": 2.9025, "learning_rate": 3.2408920757104446e-06, "epoch": 3.3925920561932004, "total_flos": 3237589280399063040, "step": 843300 }, { "loss": 2.845, "learning_rate": 3.240080950314514e-06, "epoch": 3.392994355737395, "total_flos": 3237984824542402560, "step": 843400 }, { "loss": 2.9125, "learning_rate": 3.2392698249185834e-06, "epoch": 3.3933966552815895, "total_flos": 3238378573485864960, "step": 843500 }, { "loss": 2.83, "learning_rate": 3.238458699522653e-06, "epoch": 3.393798954825784, "total_flos": 3238769236597585920, "step": 843600 }, { "loss": 2.81, "learning_rate": 3.237647574126722e-06, "epoch": 3.3942012543699787, "total_flos": 3239157339690547200, "step": 843700 }, { "loss": 2.8525, "learning_rate": 3.236836448730792e-06, "epoch": 3.3946035539141732, "total_flos": 3239543902523258880, "step": 843800 }, { "loss": 2.8775, "learning_rate": 3.236025323334861e-06, "epoch": 3.3950058534583683, "total_flos": 3239946659333560320, "step": 843900 }, { "loss": 2.87, "learning_rate": 3.2352141979389306e-06, "epoch": 3.395408153002563, "total_flos": 3240332483903600640, "step": 844000 }, { "loss": 2.9025, "learning_rate": 3.234403072543e-06, "epoch": 3.3958104525467574, "total_flos": 3240717304648857600, "step": 844100 }, { "loss": 2.8675, "learning_rate": 3.2335919471470694e-06, "epoch": 3.396212752090952, "total_flos": 3241124475101460480, "step": 844200 }, { "loss": 2.865, "learning_rate": 3.2327808217511386e-06, "epoch": 3.3966150516351465, "total_flos": 3241508477915412480, "step": 844300 }, { "loss": 2.85, "learning_rate": 3.2319696963552083e-06, "epoch": 3.397017351179341, "total_flos": 3241889692327188480, "step": 844400 }, { "loss": 2.925, "learning_rate": 3.231158570959278e-06, "epoch": 3.3974196507235357, "total_flos": 3242277938823690240, "step": 844500 }, { "loss": 2.8475, "learning_rate": 3.230347445563347e-06, "epoch": 3.3978219502677303, "total_flos": 3242665935691806720, "step": 844600 }, { "loss": 2.8575, "learning_rate": 3.2295363201674167e-06, "epoch": 3.398224249811925, "total_flos": 3243051409719859200, "step": 844700 }, { "loss": 2.8925, "learning_rate": 3.228725194771486e-06, "epoch": 3.3986265493561194, "total_flos": 3243436182663936000, "step": 844800 }, { "loss": 2.8375, "learning_rate": 3.2279140693755555e-06, "epoch": 3.399028848900314, "total_flos": 3243809260252600320, "step": 844900 }, { "loss": 2.815, "learning_rate": 3.2271029439796247e-06, "epoch": 3.399431148444509, "total_flos": 3244191574091520000, "step": 845000 }, { "loss": 2.8825, "learning_rate": 3.2262918185836943e-06, "epoch": 3.3998334479887036, "total_flos": 3244569580512983040, "step": 845100 }, { "loss": 2.8275, "learning_rate": 3.2254806931877635e-06, "epoch": 3.400235747532898, "total_flos": 3244950609031280640, "step": 845200 }, { "loss": 2.8975, "learning_rate": 3.224669567791833e-06, "epoch": 3.4006380470770927, "total_flos": 3245334394084300800, "step": 845300 }, { "loss": 2.91, "learning_rate": 3.2238584423959023e-06, "epoch": 3.4010403466212873, "total_flos": 3245726246914283520, "step": 845400 }, { "loss": 2.885, "learning_rate": 3.223047316999972e-06, "epoch": 3.401442646165482, "total_flos": 3246120102082590720, "step": 845500 }, { "loss": 2.8625, "learning_rate": 3.222236191604041e-06, "epoch": 3.4018449457096764, "total_flos": 3246500211755980800, "step": 845600 }, { "loss": 2.8475, "learning_rate": 3.2214250662081108e-06, "epoch": 3.402247245253871, "total_flos": 3246877442736076800, "step": 845700 }, { "loss": 2.8975, "learning_rate": 3.22061394081218e-06, "epoch": 3.4026495447980656, "total_flos": 3247247779723745280, "step": 845800 }, { "loss": 2.9, "learning_rate": 3.2198028154162496e-06, "epoch": 3.4030518443422606, "total_flos": 3247645905130813440, "step": 845900 }, { "loss": 2.8575, "learning_rate": 3.2189916900203188e-06, "epoch": 3.4034541438864547, "total_flos": 3248020384887429120, "step": 846000 }, { "loss": 2.835, "learning_rate": 3.2181805646243884e-06, "epoch": 3.4038564434306497, "total_flos": 3248405205632686080, "step": 846100 }, { "loss": 2.905, "learning_rate": 3.2173694392284576e-06, "epoch": 3.4042587429748443, "total_flos": 3248804334864537600, "step": 846200 }, { "loss": 2.8825, "learning_rate": 3.216558313832527e-06, "epoch": 3.404661042519039, "total_flos": 3249169695218227200, "step": 846300 }, { "loss": 2.9, "learning_rate": 3.2157471884365964e-06, "epoch": 3.4050633420632335, "total_flos": 3249546671258695680, "step": 846400 }, { "loss": 2.86, "learning_rate": 3.214936063040666e-06, "epoch": 3.405465641607428, "total_flos": 3249939225172654080, "step": 846500 }, { "loss": 2.8475, "learning_rate": 3.2141249376447352e-06, "epoch": 3.4058679411516226, "total_flos": 3250316881052129280, "step": 846600 }, { "loss": 2.8375, "learning_rate": 3.213313812248805e-06, "epoch": 3.406270240695817, "total_flos": 3250673387565004800, "step": 846700 }, { "loss": 2.8975, "learning_rate": 3.212502686852874e-06, "epoch": 3.4066725402400118, "total_flos": 3251061804021258240, "step": 846800 }, { "loss": 2.865, "learning_rate": 3.2116915614569437e-06, "epoch": 3.4070748397842063, "total_flos": 3251445344757135360, "step": 846900 }, { "loss": 2.8575, "learning_rate": 3.210880436061013e-06, "epoch": 3.4074771393284013, "total_flos": 3251814481404057600, "step": 847000 }, { "loss": 2.8775, "learning_rate": 3.2100693106650825e-06, "epoch": 3.407879438872596, "total_flos": 3252196683706890240, "step": 847100 }, { "loss": 2.8675, "learning_rate": 3.2092581852691517e-06, "epoch": 3.4082817384167905, "total_flos": 3252584266298112000, "step": 847200 }, { "loss": 2.905, "learning_rate": 3.2084470598732213e-06, "epoch": 3.408684037960985, "total_flos": 3252968019483678720, "step": 847300 }, { "loss": 2.8625, "learning_rate": 3.2076359344772905e-06, "epoch": 3.4090863375051796, "total_flos": 3253347417450608640, "step": 847400 }, { "loss": 2.87, "learning_rate": 3.20682480908136e-06, "epoch": 3.409488637049374, "total_flos": 3253738755090094080, "step": 847500 }, { "loss": 2.905, "learning_rate": 3.2060136836854293e-06, "epoch": 3.409890936593569, "total_flos": 3254112257578137600, "step": 847600 }, { "loss": 2.8525, "learning_rate": 3.205202558289499e-06, "epoch": 3.4102932361377634, "total_flos": 3254502416121845760, "step": 847700 }, { "loss": 2.7975, "learning_rate": 3.204391432893568e-06, "epoch": 3.410695535681958, "total_flos": 3254874426150819840, "step": 847800 }, { "loss": 2.865, "learning_rate": 3.2035803074976377e-06, "epoch": 3.4110978352261525, "total_flos": 3255244099233208320, "step": 847900 }, { "loss": 2.8525, "learning_rate": 3.202769182101707e-06, "epoch": 3.411500134770347, "total_flos": 3255633721341450240, "step": 848000 }, { "loss": 2.875, "learning_rate": 3.2019580567057765e-06, "epoch": 3.411902434314542, "total_flos": 3256015578413537280, "step": 848100 }, { "loss": 2.8825, "learning_rate": 3.2011469313098457e-06, "epoch": 3.4123047338587367, "total_flos": 3256402502410721280, "step": 848200 }, { "loss": 2.81, "learning_rate": 3.2003358059139154e-06, "epoch": 3.4127070334029312, "total_flos": 3256772324207892480, "step": 848300 }, { "loss": 2.825, "learning_rate": 3.1995246805179846e-06, "epoch": 3.413109332947126, "total_flos": 3257151313209169920, "step": 848400 }, { "loss": 2.8725, "learning_rate": 3.1987135551220546e-06, "epoch": 3.4135116324913204, "total_flos": 3257546177513502720, "step": 848500 }, { "loss": 2.9, "learning_rate": 3.1979024297261234e-06, "epoch": 3.413913932035515, "total_flos": 3257930706140436480, "step": 848600 }, { "loss": 2.865, "learning_rate": 3.1970913043301934e-06, "epoch": 3.4143162315797095, "total_flos": 3258287653486417920, "step": 848700 }, { "loss": 2.8, "learning_rate": 3.196280178934262e-06, "epoch": 3.414718531123904, "total_flos": 3258669356532480000, "step": 848800 }, { "loss": 2.925, "learning_rate": 3.1954690535383322e-06, "epoch": 3.4151208306680987, "total_flos": 3259055542266992640, "step": 848900 }, { "loss": 2.865, "learning_rate": 3.194657928142401e-06, "epoch": 3.4155231302122937, "total_flos": 3259434674671810560, "step": 849000 }, { "loss": 2.8425, "learning_rate": 3.193846802746471e-06, "epoch": 3.4159254297564883, "total_flos": 3259810992118241280, "step": 849100 }, { "loss": 2.9, "learning_rate": 3.1930356773505407e-06, "epoch": 3.416327729300683, "total_flos": 3260188207164610560, "step": 849200 }, { "loss": 2.905, "learning_rate": 3.19222455195461e-06, "epoch": 3.4167300288448774, "total_flos": 3260573213803345920, "step": 849300 }, { "loss": 2.855, "learning_rate": 3.1914134265586795e-06, "epoch": 3.417132328389072, "total_flos": 3260957471556925440, "step": 849400 }, { "loss": 2.9, "learning_rate": 3.1906023011627487e-06, "epoch": 3.4175346279332666, "total_flos": 3261333868671989760, "step": 849500 }, { "loss": 2.8825, "learning_rate": 3.1897911757668183e-06, "epoch": 3.417936927477461, "total_flos": 3261705262596864000, "step": 849600 }, { "loss": 2.875, "learning_rate": 3.1889800503708875e-06, "epoch": 3.4183392270216557, "total_flos": 3262083731096401920, "step": 849700 }, { "loss": 2.895, "learning_rate": 3.188168924974957e-06, "epoch": 3.4187415265658503, "total_flos": 3262478951253964800, "step": 849800 }, { "loss": 2.9025, "learning_rate": 3.1873577995790263e-06, "epoch": 3.419143826110045, "total_flos": 3262851327758653440, "step": 849900 }, { "loss": 2.8975, "learning_rate": 3.186546674183096e-06, "epoch": 3.4195461256542394, "total_flos": 3263234523263784960, "step": 850000 }, { "loss": 2.8525, "learning_rate": 3.185735548787165e-06, "epoch": 3.4199484251984344, "total_flos": 3263625154508052480, "step": 850100 }, { "loss": 2.8725, "learning_rate": 3.1849244233912347e-06, "epoch": 3.420350724742629, "total_flos": 3263986255245465600, "step": 850200 }, { "loss": 2.9075, "learning_rate": 3.184113297995304e-06, "epoch": 3.4207530242868236, "total_flos": 3264363151617300480, "step": 850300 }, { "loss": 2.8675, "learning_rate": 3.1833021725993736e-06, "epoch": 3.421155323831018, "total_flos": 3264735496254535680, "step": 850400 }, { "loss": 2.86, "learning_rate": 3.1824910472034428e-06, "epoch": 3.4215576233752127, "total_flos": 3265120232019916800, "step": 850500 }, { "loss": 2.9, "learning_rate": 3.1816799218075124e-06, "epoch": 3.4219599229194073, "total_flos": 3265526913838233600, "step": 850600 }, { "loss": 2.86, "learning_rate": 3.1808687964115816e-06, "epoch": 3.422362222463602, "total_flos": 3265927588641576960, "step": 850700 }, { "loss": 2.87, "learning_rate": 3.180057671015651e-06, "epoch": 3.4227645220077965, "total_flos": 3266294648592783360, "step": 850800 }, { "loss": 2.8725, "learning_rate": 3.1792465456197204e-06, "epoch": 3.423166821551991, "total_flos": 3266675533707540480, "step": 850900 }, { "loss": 2.8525, "learning_rate": 3.17843542022379e-06, "epoch": 3.4235691210961856, "total_flos": 3267042455566448640, "step": 851000 }, { "loss": 2.8725, "learning_rate": 3.177624294827859e-06, "epoch": 3.42397142064038, "total_flos": 3267434308396431360, "step": 851100 }, { "loss": 2.8625, "learning_rate": 3.176813169431929e-06, "epoch": 3.424373720184575, "total_flos": 3267819400015042560, "step": 851200 }, { "loss": 2.8475, "learning_rate": 3.176002044035998e-06, "epoch": 3.4247760197287698, "total_flos": 3268220074818385920, "step": 851300 }, { "loss": 2.825, "learning_rate": 3.1751909186400676e-06, "epoch": 3.4251783192729643, "total_flos": 3268599950797117440, "step": 851400 }, { "loss": 2.9125, "learning_rate": 3.174379793244137e-06, "epoch": 3.425580618817159, "total_flos": 3269001284194498560, "step": 851500 }, { "loss": 2.92, "learning_rate": 3.1735686678482064e-06, "epoch": 3.4259829183613535, "total_flos": 3269381685986211840, "step": 851600 }, { "loss": 2.8725, "learning_rate": 3.1727575424522756e-06, "epoch": 3.426385217905548, "total_flos": 3269762379896248320, "step": 851700 }, { "loss": 2.85, "learning_rate": 3.1719464170563453e-06, "epoch": 3.4267875174497426, "total_flos": 3270139010705971200, "step": 851800 }, { "loss": 2.925, "learning_rate": 3.1711352916604145e-06, "epoch": 3.427189816993937, "total_flos": 3270547986980935680, "step": 851900 }, { "loss": 2.8725, "learning_rate": 3.170324166264484e-06, "epoch": 3.427592116538132, "total_flos": 3270948714896701440, "step": 852000 }, { "loss": 2.8075, "learning_rate": 3.1695130408685533e-06, "epoch": 3.427994416082327, "total_flos": 3271349527792343040, "step": 852100 }, { "loss": 2.915, "learning_rate": 3.168701915472623e-06, "epoch": 3.4283967156265214, "total_flos": 3271737572461639680, "step": 852200 }, { "loss": 2.885, "learning_rate": 3.167890790076692e-06, "epoch": 3.428799015170716, "total_flos": 3272120019081615360, "step": 852300 }, { "loss": 2.8375, "learning_rate": 3.1670796646807617e-06, "epoch": 3.4292013147149105, "total_flos": 3272505785227991040, "step": 852400 }, { "loss": 2.8625, "learning_rate": 3.166268539284831e-06, "epoch": 3.429603614259105, "total_flos": 3272896304936171520, "step": 852500 }, { "loss": 2.8725, "learning_rate": 3.1654574138889005e-06, "epoch": 3.4300059138032997, "total_flos": 3273280504266086400, "step": 852600 }, { "loss": 2.83, "learning_rate": 3.1646462884929697e-06, "epoch": 3.4304082133474942, "total_flos": 3273660831700408320, "step": 852700 }, { "loss": 2.8975, "learning_rate": 3.1638351630970393e-06, "epoch": 3.430810512891689, "total_flos": 3274060019355924480, "step": 852800 }, { "loss": 2.8975, "learning_rate": 3.1630240377011085e-06, "epoch": 3.4312128124358834, "total_flos": 3274439847533475840, "step": 852900 }, { "loss": 2.8675, "learning_rate": 3.162212912305178e-06, "epoch": 3.431615111980078, "total_flos": 3274828003738859520, "step": 853000 }, { "loss": 2.855, "learning_rate": 3.1614017869092474e-06, "epoch": 3.4320174115242725, "total_flos": 3275196088759818240, "step": 853100 }, { "loss": 2.8625, "learning_rate": 3.160590661513317e-06, "epoch": 3.4324197110684675, "total_flos": 3275590267913902080, "step": 853200 }, { "loss": 2.845, "learning_rate": 3.159779536117386e-06, "epoch": 3.432822010612662, "total_flos": 3275978424119285760, "step": 853300 }, { "loss": 2.8875, "learning_rate": 3.1589684107214558e-06, "epoch": 3.4332243101568567, "total_flos": 3276367074270197760, "step": 853400 }, { "loss": 2.9, "learning_rate": 3.158157285325525e-06, "epoch": 3.4336266097010513, "total_flos": 3276756478617507840, "step": 853500 }, { "loss": 2.8525, "learning_rate": 3.1573461599295946e-06, "epoch": 3.434028909245246, "total_flos": 3277144055897487360, "step": 853600 }, { "loss": 2.86, "learning_rate": 3.156535034533664e-06, "epoch": 3.4344312087894404, "total_flos": 3277540784447846400, "step": 853700 }, { "loss": 2.8475, "learning_rate": 3.1557239091377334e-06, "epoch": 3.434833508333635, "total_flos": 3277917463058749440, "step": 853800 }, { "loss": 2.855, "learning_rate": 3.154912783741803e-06, "epoch": 3.4352358078778296, "total_flos": 3278298098545121280, "step": 853900 }, { "loss": 2.8775, "learning_rate": 3.1541016583458722e-06, "epoch": 3.435638107422024, "total_flos": 3278680215868078080, "step": 854000 }, { "loss": 2.8675, "learning_rate": 3.153290532949942e-06, "epoch": 3.436040406966219, "total_flos": 3279069423699425280, "step": 854100 }, { "loss": 2.8775, "learning_rate": 3.152479407554011e-06, "epoch": 3.4364427065104133, "total_flos": 3279449214698280960, "step": 854200 }, { "loss": 2.87, "learning_rate": 3.1516682821580807e-06, "epoch": 3.4368450060546083, "total_flos": 3279821224727255040, "step": 854300 }, { "loss": 2.8075, "learning_rate": 3.15085715676215e-06, "epoch": 3.437247305598803, "total_flos": 3280200203106048000, "step": 854400 }, { "loss": 2.8575, "learning_rate": 3.1500460313662195e-06, "epoch": 3.4376496051429974, "total_flos": 3280571708567009280, "step": 854500 }, { "loss": 2.8625, "learning_rate": 3.1492349059702887e-06, "epoch": 3.438051904687192, "total_flos": 3280944297521387520, "step": 854600 }, { "loss": 2.82, "learning_rate": 3.1484237805743583e-06, "epoch": 3.4384542042313866, "total_flos": 3281322930669434880, "step": 854700 }, { "loss": 2.8575, "learning_rate": 3.1476126551784275e-06, "epoch": 3.438856503775581, "total_flos": 3281709498813388800, "step": 854800 }, { "loss": 2.86, "learning_rate": 3.146801529782497e-06, "epoch": 3.4392588033197757, "total_flos": 3282089326990940160, "step": 854900 }, { "loss": 2.8675, "learning_rate": 3.1459904043865663e-06, "epoch": 3.4396611028639703, "total_flos": 3282473760015513600, "step": 855000 }, { "loss": 2.875, "learning_rate": 3.145179278990636e-06, "epoch": 3.440063402408165, "total_flos": 3282866016499906560, "step": 855100 }, { "loss": 2.86, "learning_rate": 3.144368153594705e-06, "epoch": 3.44046570195236, "total_flos": 3283249849354106880, "step": 855200 }, { "loss": 2.8525, "learning_rate": 3.1435570281987747e-06, "epoch": 3.4408680014965545, "total_flos": 3283640252214958080, "step": 855300 }, { "loss": 2.835, "learning_rate": 3.142745902802844e-06, "epoch": 3.441270301040749, "total_flos": 3284029863700715520, "step": 855400 }, { "loss": 2.8325, "learning_rate": 3.1419347774069136e-06, "epoch": 3.4416726005849436, "total_flos": 3284410260181186560, "step": 855500 }, { "loss": 2.8375, "learning_rate": 3.1411236520109828e-06, "epoch": 3.442074900129138, "total_flos": 3284806271713843200, "step": 855600 }, { "loss": 2.81, "learning_rate": 3.1403125266150524e-06, "epoch": 3.4424771996733328, "total_flos": 3285192473382082560, "step": 855700 }, { "loss": 2.905, "learning_rate": 3.1395014012191216e-06, "epoch": 3.4428794992175273, "total_flos": 3285558476396083200, "step": 855800 }, { "loss": 2.8875, "learning_rate": 3.138690275823191e-06, "epoch": 3.443281798761722, "total_flos": 3285953122939484160, "step": 855900 }, { "loss": 2.8875, "learning_rate": 3.1378791504272604e-06, "epoch": 3.4436840983059165, "total_flos": 3286325589735290880, "step": 856000 }, { "loss": 2.85, "learning_rate": 3.13706802503133e-06, "epoch": 3.444086397850111, "total_flos": 3286710883181107200, "step": 856100 }, { "loss": 2.865, "learning_rate": 3.136256899635399e-06, "epoch": 3.4444886973943056, "total_flos": 3287100505289349120, "step": 856200 }, { "loss": 2.9125, "learning_rate": 3.135445774239469e-06, "epoch": 3.4448909969385006, "total_flos": 3287468335370680320, "step": 856300 }, { "loss": 2.8625, "learning_rate": 3.134634648843538e-06, "epoch": 3.4452932964826952, "total_flos": 3287864904583772160, "step": 856400 }, { "loss": 2.845, "learning_rate": 3.1338235234476076e-06, "epoch": 3.44569559602689, "total_flos": 3288246485471262720, "step": 856500 }, { "loss": 2.9075, "learning_rate": 3.133012398051677e-06, "epoch": 3.4460978955710844, "total_flos": 3288641934012241920, "step": 856600 }, { "loss": 2.835, "learning_rate": 3.1322012726557464e-06, "epoch": 3.446500195115279, "total_flos": 3289016222564136960, "step": 856700 }, { "loss": 2.905, "learning_rate": 3.1313901472598156e-06, "epoch": 3.4469024946594735, "total_flos": 3289399572095293440, "step": 856800 }, { "loss": 2.8325, "learning_rate": 3.1305790218638853e-06, "epoch": 3.447304794203668, "total_flos": 3289766589556561920, "step": 856900 }, { "loss": 2.87, "learning_rate": 3.1297678964679545e-06, "epoch": 3.4477070937478627, "total_flos": 3290143847092869120, "step": 857000 }, { "loss": 2.91, "learning_rate": 3.128956771072024e-06, "epoch": 3.4481093932920572, "total_flos": 3290522549287065600, "step": 857100 }, { "loss": 2.89, "learning_rate": 3.1281456456760933e-06, "epoch": 3.4485116928362523, "total_flos": 3290901272726231040, "step": 857200 }, { "loss": 2.84, "learning_rate": 3.127334520280163e-06, "epoch": 3.448913992380447, "total_flos": 3291272841922099200, "step": 857300 }, { "loss": 2.83, "learning_rate": 3.126523394884232e-06, "epoch": 3.4493162919246414, "total_flos": 3291648633555548160, "step": 857400 }, { "loss": 2.885, "learning_rate": 3.1257122694883017e-06, "epoch": 3.449718591468836, "total_flos": 3292038106949007360, "step": 857500 }, { "loss": 2.865, "learning_rate": 3.124901144092371e-06, "epoch": 3.4501208910130305, "total_flos": 3292426592451409920, "step": 857600 }, { "loss": 2.8825, "learning_rate": 3.1240900186964405e-06, "epoch": 3.450523190557225, "total_flos": 3292807164202874880, "step": 857700 }, { "loss": 2.815, "learning_rate": 3.1232788933005097e-06, "epoch": 3.4509254901014197, "total_flos": 3293191267930429440, "step": 857800 }, { "loss": 2.8425, "learning_rate": 3.1224677679045793e-06, "epoch": 3.4513277896456143, "total_flos": 3293583221674014720, "step": 857900 }, { "loss": 2.8475, "learning_rate": 3.1216566425086485e-06, "epoch": 3.451730089189809, "total_flos": 3293966958925854720, "step": 858000 }, { "loss": 2.87, "learning_rate": 3.120845517112718e-06, "epoch": 3.4521323887340034, "total_flos": 3294356134889748480, "step": 858100 }, { "loss": 2.8275, "learning_rate": 3.1200343917167874e-06, "epoch": 3.452534688278198, "total_flos": 3294729095631083520, "step": 858200 }, { "loss": 2.87, "learning_rate": 3.119223266320857e-06, "epoch": 3.452936987822393, "total_flos": 3295117283703920640, "step": 858300 }, { "loss": 2.8825, "learning_rate": 3.118412140924926e-06, "epoch": 3.4533392873665876, "total_flos": 3295491322627430400, "step": 858400 }, { "loss": 2.8775, "learning_rate": 3.1176010155289958e-06, "epoch": 3.453741586910782, "total_flos": 3295880190539274240, "step": 858500 }, { "loss": 2.8725, "learning_rate": 3.116789890133065e-06, "epoch": 3.4541438864549767, "total_flos": 3296270646512547840, "step": 858600 }, { "loss": 2.83, "learning_rate": 3.1159787647371346e-06, "epoch": 3.4545461859991713, "total_flos": 3296654022599915520, "step": 858700 }, { "loss": 2.915, "learning_rate": 3.1151676393412046e-06, "epoch": 3.454948485543366, "total_flos": 3297035539752499200, "step": 858800 }, { "loss": 2.92, "learning_rate": 3.1143565139452734e-06, "epoch": 3.4553507850875604, "total_flos": 3297413912649676800, "step": 858900 }, { "loss": 2.8775, "learning_rate": 3.1135453885493435e-06, "epoch": 3.455753084631755, "total_flos": 3297812447022397440, "step": 859000 }, { "loss": 2.835, "learning_rate": 3.1127342631534122e-06, "epoch": 3.4561553841759496, "total_flos": 3298190984568084480, "step": 859100 }, { "loss": 2.8325, "learning_rate": 3.1119231377574823e-06, "epoch": 3.456557683720144, "total_flos": 3298573261228308480, "step": 859200 }, { "loss": 2.8825, "learning_rate": 3.111112012361551e-06, "epoch": 3.4569599832643387, "total_flos": 3298958060728596480, "step": 859300 }, { "loss": 2.795, "learning_rate": 3.110300886965621e-06, "epoch": 3.4573622828085337, "total_flos": 3299332264300615680, "step": 859400 }, { "loss": 2.88, "learning_rate": 3.10948976156969e-06, "epoch": 3.4577645823527283, "total_flos": 3299708496767170560, "step": 859500 }, { "loss": 2.9175, "learning_rate": 3.10867863617376e-06, "epoch": 3.458166881896923, "total_flos": 3300069990536509440, "step": 859600 }, { "loss": 2.915, "learning_rate": 3.1078675107778287e-06, "epoch": 3.4585691814411175, "total_flos": 3300455395518412800, "step": 859700 }, { "loss": 2.905, "learning_rate": 3.1070563853818987e-06, "epoch": 3.458971480985312, "total_flos": 3300840519004477440, "step": 859800 }, { "loss": 2.8325, "learning_rate": 3.106245259985968e-06, "epoch": 3.4593737805295066, "total_flos": 3301215158098360320, "step": 859900 }, { "loss": 2.83, "learning_rate": 3.1054341345900375e-06, "epoch": 3.459776080073701, "total_flos": 3301605146682316800, "step": 860000 }, { "loss": 2.845, "learning_rate": 3.1046230091941067e-06, "epoch": 3.4601783796178958, "total_flos": 3301976774301849600, "step": 860100 }, { "loss": 2.91, "learning_rate": 3.1038118837981764e-06, "epoch": 3.4605806791620903, "total_flos": 3302374910331402240, "step": 860200 }, { "loss": 2.85, "learning_rate": 3.1030007584022455e-06, "epoch": 3.4609829787062854, "total_flos": 3302758201438894080, "step": 860300 }, { "loss": 2.88, "learning_rate": 3.102189633006315e-06, "epoch": 3.46138527825048, "total_flos": 3303136515912407040, "step": 860400 }, { "loss": 2.86, "learning_rate": 3.1013785076103844e-06, "epoch": 3.4617875777946745, "total_flos": 3303514787895982080, "step": 860500 }, { "loss": 2.8675, "learning_rate": 3.100567382214454e-06, "epoch": 3.462189877338869, "total_flos": 3303903650496583680, "step": 860600 }, { "loss": 2.86, "learning_rate": 3.099756256818523e-06, "epoch": 3.4625921768830636, "total_flos": 3304305892116387840, "step": 860700 }, { "loss": 2.825, "learning_rate": 3.098945131422593e-06, "epoch": 3.462994476427258, "total_flos": 3304703109301032960, "step": 860800 }, { "loss": 2.86, "learning_rate": 3.098134006026662e-06, "epoch": 3.463396775971453, "total_flos": 3305073796830689280, "step": 860900 }, { "loss": 2.84, "learning_rate": 3.0973228806307316e-06, "epoch": 3.4637990755156474, "total_flos": 3305458675999610880, "step": 861000 }, { "loss": 2.91, "learning_rate": 3.096511755234801e-06, "epoch": 3.464201375059842, "total_flos": 3305850980285184000, "step": 861100 }, { "loss": 2.8925, "learning_rate": 3.0957006298388704e-06, "epoch": 3.4646036746040365, "total_flos": 3306233017939507200, "step": 861200 }, { "loss": 2.88, "learning_rate": 3.0948895044429396e-06, "epoch": 3.465005974148231, "total_flos": 3306621036052592640, "step": 861300 }, { "loss": 2.9275, "learning_rate": 3.0940783790470092e-06, "epoch": 3.465408273692426, "total_flos": 3307012596764252160, "step": 861400 }, { "loss": 2.9375, "learning_rate": 3.0932672536510784e-06, "epoch": 3.4658105732366207, "total_flos": 3307402059535226880, "step": 861500 }, { "loss": 2.8475, "learning_rate": 3.092456128255148e-06, "epoch": 3.4662128727808152, "total_flos": 3307784654869985280, "step": 861600 }, { "loss": 2.87, "learning_rate": 3.0916450028592173e-06, "epoch": 3.46661517232501, "total_flos": 3308166947463936000, "step": 861700 }, { "loss": 2.8375, "learning_rate": 3.090833877463287e-06, "epoch": 3.4670174718692044, "total_flos": 3308556904180439040, "step": 861800 }, { "loss": 2.87, "learning_rate": 3.090022752067356e-06, "epoch": 3.467419771413399, "total_flos": 3308945320636692480, "step": 861900 }, { "loss": 2.8475, "learning_rate": 3.0892116266714257e-06, "epoch": 3.4678220709575935, "total_flos": 3309332377414932480, "step": 862000 }, { "loss": 2.9025, "learning_rate": 3.088400501275495e-06, "epoch": 3.468224370501788, "total_flos": 3309718111693854720, "step": 862100 }, { "loss": 2.89, "learning_rate": 3.0875893758795645e-06, "epoch": 3.4686266700459827, "total_flos": 3310081528132884480, "step": 862200 }, { "loss": 2.84, "learning_rate": 3.0867782504836337e-06, "epoch": 3.4690289695901777, "total_flos": 3310466040826091520, "step": 862300 }, { "loss": 2.8275, "learning_rate": 3.0859671250877033e-06, "epoch": 3.469431269134372, "total_flos": 3310841269467863040, "step": 862400 }, { "loss": 2.9225, "learning_rate": 3.0851559996917725e-06, "epoch": 3.469833568678567, "total_flos": 3311227922591692800, "step": 862500 }, { "loss": 2.925, "learning_rate": 3.084344874295842e-06, "epoch": 3.4702358682227614, "total_flos": 3311608515588126720, "step": 862600 }, { "loss": 2.8075, "learning_rate": 3.0835337488999113e-06, "epoch": 3.470638167766956, "total_flos": 3312005100734945280, "step": 862700 }, { "loss": 2.8675, "learning_rate": 3.082722623503981e-06, "epoch": 3.4710404673111506, "total_flos": 3312384010067589120, "step": 862800 }, { "loss": 2.8525, "learning_rate": 3.08191149810805e-06, "epoch": 3.471442766855345, "total_flos": 3312760848015759360, "step": 862900 }, { "loss": 2.8175, "learning_rate": 3.0811003727121198e-06, "epoch": 3.4718450663995397, "total_flos": 3313127217505474560, "step": 863000 }, { "loss": 2.835, "learning_rate": 3.080289247316189e-06, "epoch": 3.4722473659437343, "total_flos": 3313500518166312960, "step": 863100 }, { "loss": 2.8775, "learning_rate": 3.0794781219202586e-06, "epoch": 3.472649665487929, "total_flos": 3313877303002060800, "step": 863200 }, { "loss": 2.875, "learning_rate": 3.0786669965243278e-06, "epoch": 3.4730519650321234, "total_flos": 3314263154128312320, "step": 863300 }, { "loss": 2.9475, "learning_rate": 3.0778558711283974e-06, "epoch": 3.4734542645763185, "total_flos": 3314639126343997440, "step": 863400 }, { "loss": 2.835, "learning_rate": 3.077044745732467e-06, "epoch": 3.473856564120513, "total_flos": 3315019830876518400, "step": 863500 }, { "loss": 2.875, "learning_rate": 3.076233620336536e-06, "epoch": 3.4742588636647076, "total_flos": 3315403021070407680, "step": 863600 }, { "loss": 2.845, "learning_rate": 3.075422494940606e-06, "epoch": 3.474661163208902, "total_flos": 3315787602809763840, "step": 863700 }, { "loss": 2.8525, "learning_rate": 3.074611369544675e-06, "epoch": 3.4750634627530967, "total_flos": 3316186323075962880, "step": 863800 }, { "loss": 2.845, "learning_rate": 3.0738002441487446e-06, "epoch": 3.4754657622972913, "total_flos": 3316568966211901440, "step": 863900 }, { "loss": 2.8625, "learning_rate": 3.072989118752814e-06, "epoch": 3.475868061841486, "total_flos": 3316969805663754240, "step": 864000 }, { "loss": 2.8825, "learning_rate": 3.0721779933568835e-06, "epoch": 3.4762703613856805, "total_flos": 3317353712875345920, "step": 864100 }, { "loss": 2.9025, "learning_rate": 3.0713668679609527e-06, "epoch": 3.476672660929875, "total_flos": 3317738613289236480, "step": 864200 }, { "loss": 2.86, "learning_rate": 3.0705557425650223e-06, "epoch": 3.4770749604740696, "total_flos": 3318110410868520960, "step": 864300 }, { "loss": 2.86, "learning_rate": 3.0697446171690915e-06, "epoch": 3.477477260018264, "total_flos": 3318483233517557760, "step": 864400 }, { "loss": 2.87, "learning_rate": 3.068933491773161e-06, "epoch": 3.477879559562459, "total_flos": 3318875925523814400, "step": 864500 }, { "loss": 2.865, "learning_rate": 3.0681223663772303e-06, "epoch": 3.4782818591066538, "total_flos": 3319247643434465280, "step": 864600 }, { "loss": 2.8675, "learning_rate": 3.0673112409813e-06, "epoch": 3.4786841586508483, "total_flos": 3319638465883453440, "step": 864700 }, { "loss": 2.875, "learning_rate": 3.066500115585369e-06, "epoch": 3.479086458195043, "total_flos": 3320020450425354240, "step": 864800 }, { "loss": 2.9025, "learning_rate": 3.0656889901894387e-06, "epoch": 3.4794887577392375, "total_flos": 3320402041935329280, "step": 864900 }, { "loss": 2.8175, "learning_rate": 3.064877864793508e-06, "epoch": 3.479891057283432, "total_flos": 3320789024356177920, "step": 865000 }, { "loss": 2.89, "learning_rate": 3.0640667393975775e-06, "epoch": 3.4802933568276266, "total_flos": 3321191329710888960, "step": 865100 }, { "loss": 2.88, "learning_rate": 3.0632556140016467e-06, "epoch": 3.480695656371821, "total_flos": 3321569219285022720, "step": 865200 }, { "loss": 2.8675, "learning_rate": 3.0624444886057164e-06, "epoch": 3.481097955916016, "total_flos": 3321943794643998720, "step": 865300 }, { "loss": 2.865, "learning_rate": 3.0616333632097855e-06, "epoch": 3.481500255460211, "total_flos": 3322324716937451520, "step": 865400 }, { "loss": 2.87, "learning_rate": 3.060822237813855e-06, "epoch": 3.481902555004405, "total_flos": 3322711338193827840, "step": 865500 }, { "loss": 2.8275, "learning_rate": 3.0600111124179244e-06, "epoch": 3.4823048545486, "total_flos": 3323090614002186240, "step": 865600 }, { "loss": 2.865, "learning_rate": 3.059199987021994e-06, "epoch": 3.4827071540927945, "total_flos": 3323465215917373440, "step": 865700 }, { "loss": 2.845, "learning_rate": 3.058388861626063e-06, "epoch": 3.483109453636989, "total_flos": 3323870500878981120, "step": 865800 }, { "loss": 2.9, "learning_rate": 3.057577736230133e-06, "epoch": 3.4835117531811837, "total_flos": 3324262884833187840, "step": 865900 }, { "loss": 2.8475, "learning_rate": 3.056766610834202e-06, "epoch": 3.4839140527253782, "total_flos": 3324635903998187520, "step": 866000 }, { "loss": 2.86, "learning_rate": 3.0559554854382716e-06, "epoch": 3.484316352269573, "total_flos": 3325003160465356800, "step": 866100 }, { "loss": 2.87, "learning_rate": 3.055144360042341e-06, "epoch": 3.4847186518137674, "total_flos": 3325385415880611840, "step": 866200 }, { "loss": 2.8825, "learning_rate": 3.0543332346464104e-06, "epoch": 3.485120951357962, "total_flos": 3325761048176793600, "step": 866300 }, { "loss": 2.8525, "learning_rate": 3.0535221092504796e-06, "epoch": 3.4855232509021565, "total_flos": 3326139320160368640, "step": 866400 }, { "loss": 2.9325, "learning_rate": 3.0527109838545492e-06, "epoch": 3.4859255504463516, "total_flos": 3326508701124433920, "step": 866500 }, { "loss": 2.8625, "learning_rate": 3.0518998584586184e-06, "epoch": 3.486327849990546, "total_flos": 3326897962068203520, "step": 866600 }, { "loss": 2.8925, "learning_rate": 3.051088733062688e-06, "epoch": 3.4867301495347407, "total_flos": 3327273190709975040, "step": 866700 }, { "loss": 2.8725, "learning_rate": 3.0502776076667573e-06, "epoch": 3.4871324490789353, "total_flos": 3327669372202383360, "step": 866800 }, { "loss": 2.905, "learning_rate": 3.049466482270827e-06, "epoch": 3.48753474862313, "total_flos": 3328042030202910720, "step": 866900 }, { "loss": 2.885, "learning_rate": 3.048655356874896e-06, "epoch": 3.4879370481673244, "total_flos": 3328430924670965760, "step": 867000 }, { "loss": 2.83, "learning_rate": 3.0478442314789657e-06, "epoch": 3.488339347711519, "total_flos": 3328809903049758720, "step": 867100 }, { "loss": 2.8675, "learning_rate": 3.047033106083035e-06, "epoch": 3.4887416472557136, "total_flos": 3329188902673520640, "step": 867200 }, { "loss": 2.92, "learning_rate": 3.0462219806871045e-06, "epoch": 3.489143946799908, "total_flos": 3329576809250519040, "step": 867300 }, { "loss": 2.865, "learning_rate": 3.0454108552911737e-06, "epoch": 3.4895462463441027, "total_flos": 3329964917654722560, "step": 867400 }, { "loss": 2.8975, "learning_rate": 3.0445997298952433e-06, "epoch": 3.4899485458882973, "total_flos": 3330335052815185920, "step": 867500 }, { "loss": 2.8575, "learning_rate": 3.0437886044993125e-06, "epoch": 3.4903508454324923, "total_flos": 3330711630512486400, "step": 867600 }, { "loss": 2.8425, "learning_rate": 3.042977479103382e-06, "epoch": 3.490753144976687, "total_flos": 3331113256028190720, "step": 867700 }, { "loss": 2.855, "learning_rate": 3.0421663537074513e-06, "epoch": 3.4911554445208814, "total_flos": 3331488941436794880, "step": 867800 }, { "loss": 2.845, "learning_rate": 3.041355228311521e-06, "epoch": 3.491557744065076, "total_flos": 3331865354485585920, "step": 867900 }, { "loss": 2.8475, "learning_rate": 3.04054410291559e-06, "epoch": 3.4919600436092706, "total_flos": 3332243244059719680, "step": 868000 }, { "loss": 2.86, "learning_rate": 3.0397329775196598e-06, "epoch": 3.492362343153465, "total_flos": 3332621834717829120, "step": 868100 }, { "loss": 2.855, "learning_rate": 3.0389218521237294e-06, "epoch": 3.4927646426976597, "total_flos": 3333012126042593280, "step": 868200 }, { "loss": 2.8825, "learning_rate": 3.0381107267277986e-06, "epoch": 3.4931669422418543, "total_flos": 3333400287559219200, "step": 868300 }, { "loss": 2.92, "learning_rate": 3.037299601331868e-06, "epoch": 3.493569241786049, "total_flos": 3333783642401617920, "step": 868400 }, { "loss": 2.8775, "learning_rate": 3.0364884759359374e-06, "epoch": 3.493971541330244, "total_flos": 3334176653082408960, "step": 868500 }, { "loss": 2.8575, "learning_rate": 3.0356773505400074e-06, "epoch": 3.4943738408744385, "total_flos": 3334567231214254080, "step": 868600 }, { "loss": 2.8525, "learning_rate": 3.034866225144076e-06, "epoch": 3.494776140418633, "total_flos": 3334950347050752000, "step": 868700 }, { "loss": 2.8675, "learning_rate": 3.0340550997481463e-06, "epoch": 3.4951784399628276, "total_flos": 3335327455872276480, "step": 868800 }, { "loss": 2.925, "learning_rate": 3.033243974352215e-06, "epoch": 3.495580739507022, "total_flos": 3335697187378329600, "step": 868900 }, { "loss": 2.815, "learning_rate": 3.032432848956285e-06, "epoch": 3.4959830390512168, "total_flos": 3336100358465525760, "step": 869000 }, { "loss": 2.835, "learning_rate": 3.031621723560354e-06, "epoch": 3.4963853385954113, "total_flos": 3336486321127864320, "step": 869100 }, { "loss": 2.88, "learning_rate": 3.030810598164424e-06, "epoch": 3.496787638139606, "total_flos": 3336874333929707520, "step": 869200 }, { "loss": 2.8725, "learning_rate": 3.0299994727684927e-06, "epoch": 3.4971899376838005, "total_flos": 3337260222234654720, "step": 869300 }, { "loss": 2.8925, "learning_rate": 3.0291883473725627e-06, "epoch": 3.497592237227995, "total_flos": 3337639158123509760, "step": 869400 }, { "loss": 2.855, "learning_rate": 3.0283772219766315e-06, "epoch": 3.4979945367721896, "total_flos": 3338024435635599360, "step": 869500 }, { "loss": 2.8525, "learning_rate": 3.0275660965807015e-06, "epoch": 3.4983968363163847, "total_flos": 3338406574203525120, "step": 869600 }, { "loss": 2.895, "learning_rate": 3.0267549711847703e-06, "epoch": 3.4987991358605792, "total_flos": 3338800323146987520, "step": 869700 }, { "loss": 2.825, "learning_rate": 3.0259438457888403e-06, "epoch": 3.499201435404774, "total_flos": 3339193710925977600, "step": 869800 }, { "loss": 2.845, "learning_rate": 3.025132720392909e-06, "epoch": 3.4996037349489684, "total_flos": 3339576168168437760, "step": 869900 }, { "loss": 2.855, "learning_rate": 3.024321594996979e-06, "epoch": 3.500006034493163, "total_flos": 3339969508146247680, "step": 870000 }, { "loss": 2.885, "learning_rate": 3.023510469601048e-06, "epoch": 3.5004083340373575, "total_flos": 3340342458265098240, "step": 870100 }, { "loss": 2.85, "learning_rate": 3.022699344205118e-06, "epoch": 3.500810633581552, "total_flos": 3340732032572160000, "step": 870200 }, { "loss": 2.8275, "learning_rate": 3.0218882188091867e-06, "epoch": 3.5012129331257467, "total_flos": 3341124644909783040, "step": 870300 }, { "loss": 2.9075, "learning_rate": 3.0210770934132568e-06, "epoch": 3.5016152326699412, "total_flos": 3341511483927091200, "step": 870400 }, { "loss": 2.885, "learning_rate": 3.0202659680173255e-06, "epoch": 3.5020175322141363, "total_flos": 3341908345258506240, "step": 870500 }, { "loss": 2.8375, "learning_rate": 3.0194548426213956e-06, "epoch": 3.5024198317583304, "total_flos": 3342292077199104000, "step": 870600 }, { "loss": 2.895, "learning_rate": 3.0186437172254644e-06, "epoch": 3.5028221313025254, "total_flos": 3342687876282071040, "step": 870700 }, { "loss": 2.88, "learning_rate": 3.0178325918295344e-06, "epoch": 3.50322443084672, "total_flos": 3343079421060003840, "step": 870800 }, { "loss": 2.8625, "learning_rate": 3.017021466433603e-06, "epoch": 3.5036267303909145, "total_flos": 3343446151714191360, "step": 870900 }, { "loss": 2.885, "learning_rate": 3.0162103410376732e-06, "epoch": 3.504029029935109, "total_flos": 3343837962054236160, "step": 871000 }, { "loss": 2.8275, "learning_rate": 3.0153992156417424e-06, "epoch": 3.5044313294793037, "total_flos": 3344220148423342080, "step": 871100 }, { "loss": 2.8975, "learning_rate": 3.014588090245812e-06, "epoch": 3.5048336290234983, "total_flos": 3344612261504194560, "step": 871200 }, { "loss": 2.855, "learning_rate": 3.0137769648498812e-06, "epoch": 3.505235928567693, "total_flos": 3345000173392435200, "step": 871300 }, { "loss": 2.8175, "learning_rate": 3.012965839453951e-06, "epoch": 3.5056382281118874, "total_flos": 3345385918293841920, "step": 871400 }, { "loss": 2.855, "learning_rate": 3.01215471405802e-06, "epoch": 3.506040527656082, "total_flos": 3345778482830284800, "step": 871500 }, { "loss": 2.8175, "learning_rate": 3.0113435886620897e-06, "epoch": 3.506442827200277, "total_flos": 3346175901842135040, "step": 871600 }, { "loss": 2.8925, "learning_rate": 3.010532463266159e-06, "epoch": 3.506845126744471, "total_flos": 3346559713451366400, "step": 871700 }, { "loss": 2.8875, "learning_rate": 3.0097213378702285e-06, "epoch": 3.507247426288666, "total_flos": 3346922444740147200, "step": 871800 }, { "loss": 2.85, "learning_rate": 3.0089102124742977e-06, "epoch": 3.5076497258328607, "total_flos": 3347307430133913600, "step": 871900 }, { "loss": 2.865, "learning_rate": 3.0080990870783673e-06, "epoch": 3.5080520253770553, "total_flos": 3347678011438725120, "step": 872000 }, { "loss": 2.8475, "learning_rate": 3.0072879616824365e-06, "epoch": 3.50845432492125, "total_flos": 3348066714702059520, "step": 872100 }, { "loss": 2.835, "learning_rate": 3.006476836286506e-06, "epoch": 3.5088566244654444, "total_flos": 3348428643993262080, "step": 872200 }, { "loss": 2.84, "learning_rate": 3.0056657108905753e-06, "epoch": 3.509258924009639, "total_flos": 3348812726475847680, "step": 872300 }, { "loss": 2.8775, "learning_rate": 3.004854585494645e-06, "epoch": 3.5096612235538336, "total_flos": 3349192905195386880, "step": 872400 }, { "loss": 2.9075, "learning_rate": 3.004043460098714e-06, "epoch": 3.5100635230980286, "total_flos": 3349577646272010240, "step": 872500 }, { "loss": 2.87, "learning_rate": 3.0032323347027837e-06, "epoch": 3.5104658226422227, "total_flos": 3349960347831613440, "step": 872600 }, { "loss": 2.86, "learning_rate": 3.002421209306853e-06, "epoch": 3.5108681221864178, "total_flos": 3350335507427235840, "step": 872700 }, { "loss": 2.85, "learning_rate": 3.0016100839109226e-06, "epoch": 3.5112704217306123, "total_flos": 3350727339012249600, "step": 872800 }, { "loss": 2.8375, "learning_rate": 3.000798958514992e-06, "epoch": 3.511672721274807, "total_flos": 3351120700235028480, "step": 872900 }, { "loss": 2.8625, "learning_rate": 2.9999878331190614e-06, "epoch": 3.5120750208190015, "total_flos": 3351500958623201280, "step": 873000 }, { "loss": 2.86, "learning_rate": 2.999176707723131e-06, "epoch": 3.512477320363196, "total_flos": 3351886655723427840, "step": 873100 }, { "loss": 2.8275, "learning_rate": 2.9983655823272e-06, "epoch": 3.5128796199073906, "total_flos": 3352251877984819200, "step": 873200 }, { "loss": 2.82, "learning_rate": 2.99755445693127e-06, "epoch": 3.513281919451585, "total_flos": 3352623733987768320, "step": 873300 }, { "loss": 2.82, "learning_rate": 2.996743331535339e-06, "epoch": 3.5136842189957798, "total_flos": 3353007864271534080, "step": 873400 }, { "loss": 2.81, "learning_rate": 2.9959322061394086e-06, "epoch": 3.5140865185399743, "total_flos": 3353395558398842880, "step": 873500 }, { "loss": 2.8475, "learning_rate": 2.995121080743478e-06, "epoch": 3.5144888180841694, "total_flos": 3353787124421744640, "step": 873600 }, { "loss": 2.8775, "learning_rate": 2.9943099553475474e-06, "epoch": 3.5148911176283635, "total_flos": 3354162050322708480, "step": 873700 }, { "loss": 2.875, "learning_rate": 2.9934988299516166e-06, "epoch": 3.5152934171725585, "total_flos": 3354555140672133120, "step": 873800 }, { "loss": 2.835, "learning_rate": 2.9926877045556863e-06, "epoch": 3.515695716716753, "total_flos": 3354940057019750400, "step": 873900 }, { "loss": 2.8825, "learning_rate": 2.9918765791597554e-06, "epoch": 3.5160980162609476, "total_flos": 3355335802990295040, "step": 874000 }, { "loss": 2.855, "learning_rate": 2.991065453763825e-06, "epoch": 3.516500315805142, "total_flos": 3355711642424924160, "step": 874100 }, { "loss": 2.8475, "learning_rate": 2.9902543283678943e-06, "epoch": 3.516902615349337, "total_flos": 3356110792901744640, "step": 874200 }, { "loss": 2.8375, "learning_rate": 2.989443202971964e-06, "epoch": 3.5173049148935314, "total_flos": 3356496829921474560, "step": 874300 }, { "loss": 2.81, "learning_rate": 2.988632077576033e-06, "epoch": 3.517707214437726, "total_flos": 3356870401455667200, "step": 874400 }, { "loss": 2.8175, "learning_rate": 2.9878209521801027e-06, "epoch": 3.5181095139819205, "total_flos": 3357255030996203520, "step": 874500 }, { "loss": 2.8175, "learning_rate": 2.987009826784172e-06, "epoch": 3.518511813526115, "total_flos": 3357641657563822080, "step": 874600 }, { "loss": 2.8775, "learning_rate": 2.9861987013882415e-06, "epoch": 3.51891411307031, "total_flos": 3358033643174860800, "step": 874700 }, { "loss": 2.9, "learning_rate": 2.9853875759923107e-06, "epoch": 3.5193164126145042, "total_flos": 3358400092333209600, "step": 874800 }, { "loss": 2.855, "learning_rate": 2.9845764505963803e-06, "epoch": 3.5197187121586992, "total_flos": 3358780579104798720, "step": 874900 }, { "loss": 2.885, "learning_rate": 2.9837653252004495e-06, "epoch": 3.520121011702894, "total_flos": 3359171141302917120, "step": 875000 }, { "loss": 2.8475, "learning_rate": 2.982954199804519e-06, "epoch": 3.5205233112470884, "total_flos": 3359550666739660800, "step": 875100 }, { "loss": 2.9075, "learning_rate": 2.9821430744085883e-06, "epoch": 3.520925610791283, "total_flos": 3359931838661498880, "step": 875200 }, { "loss": 2.85, "learning_rate": 2.981331949012658e-06, "epoch": 3.5213279103354775, "total_flos": 3360316186706196480, "step": 875300 }, { "loss": 2.8775, "learning_rate": 2.980520823616727e-06, "epoch": 3.521730209879672, "total_flos": 3360700221387601920, "step": 875400 }, { "loss": 2.8475, "learning_rate": 2.9797096982207968e-06, "epoch": 3.5221325094238667, "total_flos": 3361073506114713600, "step": 875500 }, { "loss": 2.8275, "learning_rate": 2.978898572824866e-06, "epoch": 3.5225348089680617, "total_flos": 3361443731566295040, "step": 875600 }, { "loss": 2.8475, "learning_rate": 2.9780874474289356e-06, "epoch": 3.522937108512256, "total_flos": 3361838319686031360, "step": 875700 }, { "loss": 2.835, "learning_rate": 2.9772763220330048e-06, "epoch": 3.523339408056451, "total_flos": 3362235265997322240, "step": 875800 }, { "loss": 2.825, "learning_rate": 2.9764651966370744e-06, "epoch": 3.5237417076006454, "total_flos": 3362609230563440640, "step": 875900 }, { "loss": 2.9025, "learning_rate": 2.9756540712411436e-06, "epoch": 3.52414400714484, "total_flos": 3362992792544286720, "step": 876000 }, { "loss": 2.8275, "learning_rate": 2.9748429458452132e-06, "epoch": 3.5245463066890346, "total_flos": 3363370076636805120, "step": 876100 }, { "loss": 2.8575, "learning_rate": 2.9740318204492824e-06, "epoch": 3.524948606233229, "total_flos": 3363749671119697920, "step": 876200 }, { "loss": 2.8675, "learning_rate": 2.973220695053352e-06, "epoch": 3.5253509057774237, "total_flos": 3364144307040614400, "step": 876300 }, { "loss": 2.845, "learning_rate": 2.9724095696574212e-06, "epoch": 3.5257532053216183, "total_flos": 3364549278638929920, "step": 876400 }, { "loss": 2.855, "learning_rate": 2.971598444261491e-06, "epoch": 3.526155504865813, "total_flos": 3364949937508546560, "step": 876500 }, { "loss": 2.83, "learning_rate": 2.97078731886556e-06, "epoch": 3.5265578044100074, "total_flos": 3365321570439321600, "step": 876600 }, { "loss": 2.8475, "learning_rate": 2.9699761934696297e-06, "epoch": 3.5269601039542025, "total_flos": 3365720609380055040, "step": 876700 }, { "loss": 2.85, "learning_rate": 2.969165068073699e-06, "epoch": 3.5273624034983966, "total_flos": 3366111830172211200, "step": 876800 }, { "loss": 2.8175, "learning_rate": 2.9683539426777685e-06, "epoch": 3.5277647030425916, "total_flos": 3366488455670691840, "step": 876900 }, { "loss": 2.88, "learning_rate": 2.9675428172818377e-06, "epoch": 3.528167002586786, "total_flos": 3366886448296704000, "step": 877000 }, { "loss": 2.8475, "learning_rate": 2.9667316918859073e-06, "epoch": 3.5285693021309807, "total_flos": 3367269845629040640, "step": 877100 }, { "loss": 2.845, "learning_rate": 2.9659205664899765e-06, "epoch": 3.5289716016751753, "total_flos": 3367643305627146240, "step": 877200 }, { "loss": 2.865, "learning_rate": 2.965109441094046e-06, "epoch": 3.52937390121937, "total_flos": 3368035418707998720, "step": 877300 }, { "loss": 2.915, "learning_rate": 2.9642983156981153e-06, "epoch": 3.5297762007635645, "total_flos": 3368411991094056960, "step": 877400 }, { "loss": 2.8825, "learning_rate": 2.963487190302185e-06, "epoch": 3.530178500307759, "total_flos": 3368809574754416640, "step": 877500 }, { "loss": 2.88, "learning_rate": 2.9626760649062545e-06, "epoch": 3.5305807998519536, "total_flos": 3369196674022594560, "step": 877600 }, { "loss": 2.8325, "learning_rate": 2.9618649395103237e-06, "epoch": 3.530983099396148, "total_flos": 3369572763085608960, "step": 877700 }, { "loss": 2.895, "learning_rate": 2.9610538141143934e-06, "epoch": 3.531385398940343, "total_flos": 3369946547069491200, "step": 877800 }, { "loss": 2.87, "learning_rate": 2.9602426887184626e-06, "epoch": 3.5317876984845378, "total_flos": 3370322912317102080, "step": 877900 }, { "loss": 2.8725, "learning_rate": 2.959431563322532e-06, "epoch": 3.5321899980287323, "total_flos": 3370693637025454080, "step": 878000 }, { "loss": 2.8975, "learning_rate": 2.9586204379266014e-06, "epoch": 3.532592297572927, "total_flos": 3371078723332823040, "step": 878100 }, { "loss": 2.8775, "learning_rate": 2.957809312530671e-06, "epoch": 3.5329945971171215, "total_flos": 3371472238581626880, "step": 878200 }, { "loss": 2.8725, "learning_rate": 2.95699818713474e-06, "epoch": 3.533396896661316, "total_flos": 3371851976468060160, "step": 878300 }, { "loss": 2.9075, "learning_rate": 2.95618706173881e-06, "epoch": 3.5337991962055106, "total_flos": 3372224427330140160, "step": 878400 }, { "loss": 2.845, "learning_rate": 2.955375936342879e-06, "epoch": 3.534201495749705, "total_flos": 3372597998864332800, "step": 878500 }, { "loss": 2.8125, "learning_rate": 2.9545648109469486e-06, "epoch": 3.5346037952939, "total_flos": 3372968357096970240, "step": 878600 }, { "loss": 2.86, "learning_rate": 2.953753685551018e-06, "epoch": 3.535006094838095, "total_flos": 3373356784175708160, "step": 878700 }, { "loss": 2.845, "learning_rate": 2.9529425601550874e-06, "epoch": 3.535408394382289, "total_flos": 3373740903836989440, "step": 878800 }, { "loss": 2.85, "learning_rate": 2.9521314347591566e-06, "epoch": 3.535810693926484, "total_flos": 3374110752190371840, "step": 878900 }, { "loss": 2.8375, "learning_rate": 2.9513203093632263e-06, "epoch": 3.5362129934706785, "total_flos": 3374480775814748160, "step": 879000 }, { "loss": 2.845, "learning_rate": 2.9505091839672954e-06, "epoch": 3.536615293014873, "total_flos": 3374880696421693440, "step": 879100 }, { "loss": 2.85, "learning_rate": 2.949698058571365e-06, "epoch": 3.5370175925590677, "total_flos": 3375281663343360000, "step": 879200 }, { "loss": 2.895, "learning_rate": 2.9488869331754343e-06, "epoch": 3.5374198921032622, "total_flos": 3375658968680847360, "step": 879300 }, { "loss": 2.9275, "learning_rate": 2.948075807779504e-06, "epoch": 3.537822191647457, "total_flos": 3376042132318525440, "step": 879400 }, { "loss": 2.855, "learning_rate": 2.947264682383573e-06, "epoch": 3.5382244911916514, "total_flos": 3376436268982671360, "step": 879500 }, { "loss": 2.8825, "learning_rate": 2.9464535569876427e-06, "epoch": 3.538626790735846, "total_flos": 3376827866873026560, "step": 879600 }, { "loss": 2.895, "learning_rate": 2.945642431591712e-06, "epoch": 3.5390290902800405, "total_flos": 3377201300314920960, "step": 879700 }, { "loss": 2.875, "learning_rate": 2.9448313061957815e-06, "epoch": 3.5394313898242356, "total_flos": 3377578361335265280, "step": 879800 }, { "loss": 2.875, "learning_rate": 2.9440201807998507e-06, "epoch": 3.5398336893684297, "total_flos": 3377964679850833920, "step": 879900 }, { "loss": 2.8875, "learning_rate": 2.9432090554039208e-06, "epoch": 3.5402359889126247, "total_flos": 3378358333191936000, "step": 880000 }, { "loss": 2.81, "learning_rate": 2.9423979300079895e-06, "epoch": 3.5406382884568193, "total_flos": 3378753957003909120, "step": 880100 }, { "loss": 2.8325, "learning_rate": 2.9415868046120596e-06, "epoch": 3.541040588001014, "total_flos": 3379136695742208000, "step": 880200 }, { "loss": 2.87, "learning_rate": 2.9407756792161283e-06, "epoch": 3.5414428875452084, "total_flos": 3379524065883740160, "step": 880300 }, { "loss": 2.835, "learning_rate": 2.9399645538201984e-06, "epoch": 3.541845187089403, "total_flos": 3379893664608737280, "step": 880400 }, { "loss": 2.8675, "learning_rate": 2.939153428424267e-06, "epoch": 3.5422474866335976, "total_flos": 3380283138002196480, "step": 880500 }, { "loss": 2.8575, "learning_rate": 2.938342303028337e-06, "epoch": 3.542649786177792, "total_flos": 3380664936650618880, "step": 880600 }, { "loss": 2.775, "learning_rate": 2.937531177632406e-06, "epoch": 3.543052085721987, "total_flos": 3381043808804567040, "step": 880700 }, { "loss": 2.8375, "learning_rate": 2.936720052236476e-06, "epoch": 3.5434543852661813, "total_flos": 3381433760209827840, "step": 880800 }, { "loss": 2.85, "learning_rate": 2.9359089268405448e-06, "epoch": 3.5438566848103763, "total_flos": 3381814199180236800, "step": 880900 }, { "loss": 2.8125, "learning_rate": 2.935097801444615e-06, "epoch": 3.544258984354571, "total_flos": 3382201362183321600, "step": 881000 }, { "loss": 2.835, "learning_rate": 2.9342866760486836e-06, "epoch": 3.5446612838987654, "total_flos": 3382586868078827520, "step": 881100 }, { "loss": 2.855, "learning_rate": 2.9334755506527536e-06, "epoch": 3.54506358344296, "total_flos": 3382966064218552320, "step": 881200 }, { "loss": 2.9, "learning_rate": 2.9326644252568224e-06, "epoch": 3.5454658829871546, "total_flos": 3383332736449075200, "step": 881300 }, { "loss": 2.8925, "learning_rate": 2.9318532998608925e-06, "epoch": 3.545868182531349, "total_flos": 3383724663636449280, "step": 881400 }, { "loss": 2.8675, "learning_rate": 2.9310421744649612e-06, "epoch": 3.5462704820755437, "total_flos": 3384117222861649920, "step": 881500 }, { "loss": 2.84, "learning_rate": 2.9302310490690313e-06, "epoch": 3.5466727816197383, "total_flos": 3384497629964605440, "step": 881600 }, { "loss": 2.805, "learning_rate": 2.9294199236731e-06, "epoch": 3.547075081163933, "total_flos": 3384865401622272000, "step": 881700 }, { "loss": 2.8525, "learning_rate": 2.92860879827717e-06, "epoch": 3.547477380708128, "total_flos": 3385256043489024000, "step": 881800 }, { "loss": 2.8525, "learning_rate": 2.927797672881239e-06, "epoch": 3.547879680252322, "total_flos": 3385632339690485760, "step": 881900 }, { "loss": 2.88, "learning_rate": 2.926986547485309e-06, "epoch": 3.548281979796517, "total_flos": 3386005927158405120, "step": 882000 }, { "loss": 2.8725, "learning_rate": 2.9261754220893777e-06, "epoch": 3.5486842793407116, "total_flos": 3386378850721044480, "step": 882100 }, { "loss": 2.86, "learning_rate": 2.9253642966934477e-06, "epoch": 3.549086578884906, "total_flos": 3386768366604441600, "step": 882200 }, { "loss": 2.78, "learning_rate": 2.9245531712975165e-06, "epoch": 3.5494888784291008, "total_flos": 3387149602261186560, "step": 882300 }, { "loss": 2.8575, "learning_rate": 2.9237420459015865e-06, "epoch": 3.5498911779732953, "total_flos": 3387535644592158720, "step": 882400 }, { "loss": 2.84, "learning_rate": 2.922930920505656e-06, "epoch": 3.55029347751749, "total_flos": 3387913417318963200, "step": 882500 }, { "loss": 2.85, "learning_rate": 2.9221197951097254e-06, "epoch": 3.5506957770616845, "total_flos": 3388289787877816320, "step": 882600 }, { "loss": 2.815, "learning_rate": 2.921308669713795e-06, "epoch": 3.551098076605879, "total_flos": 3388663407213189120, "step": 882700 }, { "loss": 2.83, "learning_rate": 2.920497544317864e-06, "epoch": 3.5515003761500736, "total_flos": 3389043495641610240, "step": 882800 }, { "loss": 2.8225, "learning_rate": 2.9196864189219338e-06, "epoch": 3.5519026756942687, "total_flos": 3389430600221030400, "step": 882900 }, { "loss": 2.85, "learning_rate": 2.918875293526003e-06, "epoch": 3.552304975238463, "total_flos": 3389842752618854400, "step": 883000 }, { "loss": 2.87, "learning_rate": 2.9180641681300726e-06, "epoch": 3.552707274782658, "total_flos": 3390217476692613120, "step": 883100 }, { "loss": 2.84, "learning_rate": 2.917253042734142e-06, "epoch": 3.5531095743268524, "total_flos": 3390603460599920640, "step": 883200 }, { "loss": 2.85, "learning_rate": 2.9164419173382114e-06, "epoch": 3.553511873871047, "total_flos": 3390977568569579520, "step": 883300 }, { "loss": 2.84, "learning_rate": 2.9156307919422806e-06, "epoch": 3.5539141734152415, "total_flos": 3391357157741230080, "step": 883400 }, { "loss": 2.8725, "learning_rate": 2.9148196665463502e-06, "epoch": 3.554316472959436, "total_flos": 3391748304175994880, "step": 883500 }, { "loss": 2.835, "learning_rate": 2.9140085411504194e-06, "epoch": 3.5547187725036307, "total_flos": 3392109080927631360, "step": 883600 }, { "loss": 2.8775, "learning_rate": 2.913197415754489e-06, "epoch": 3.5551210720478252, "total_flos": 3392486731495864320, "step": 883700 }, { "loss": 2.825, "learning_rate": 2.9123862903585582e-06, "epoch": 3.5555233715920203, "total_flos": 3392849112242657280, "step": 883800 }, { "loss": 2.8675, "learning_rate": 2.911575164962628e-06, "epoch": 3.5559256711362144, "total_flos": 3393229912377538560, "step": 883900 }, { "loss": 2.8875, "learning_rate": 2.910764039566697e-06, "epoch": 3.5563279706804094, "total_flos": 3393605953639372800, "step": 884000 }, { "loss": 2.8075, "learning_rate": 2.9099529141707667e-06, "epoch": 3.556730270224604, "total_flos": 3394005555571783680, "step": 884100 }, { "loss": 2.8375, "learning_rate": 2.909141788774836e-06, "epoch": 3.5571325697687985, "total_flos": 3394383216762501120, "step": 884200 }, { "loss": 2.8675, "learning_rate": 2.9083306633789055e-06, "epoch": 3.557534869312993, "total_flos": 3394768786392913920, "step": 884300 }, { "loss": 2.895, "learning_rate": 2.9075195379829747e-06, "epoch": 3.5579371688571877, "total_flos": 3395151487952517120, "step": 884400 }, { "loss": 2.8325, "learning_rate": 2.9067084125870443e-06, "epoch": 3.5583394684013823, "total_flos": 3395526164225095680, "step": 884500 }, { "loss": 2.7925, "learning_rate": 2.9058972871911135e-06, "epoch": 3.558741767945577, "total_flos": 3395891195281766400, "step": 884600 }, { "loss": 2.8875, "learning_rate": 2.905086161795183e-06, "epoch": 3.5591440674897714, "total_flos": 3396279569248081920, "step": 884700 }, { "loss": 2.8125, "learning_rate": 2.9042750363992523e-06, "epoch": 3.559546367033966, "total_flos": 3396668293756385280, "step": 884800 }, { "loss": 2.845, "learning_rate": 2.903463911003322e-06, "epoch": 3.559948666578161, "total_flos": 3397052891429468160, "step": 884900 }, { "loss": 2.8325, "learning_rate": 2.902652785607391e-06, "epoch": 3.560350966122355, "total_flos": 3397423265595832320, "step": 885000 }, { "loss": 2.8725, "learning_rate": 2.9018416602114608e-06, "epoch": 3.56075326566655, "total_flos": 3397805839685621760, "step": 885100 }, { "loss": 2.8975, "learning_rate": 2.90103053481553e-06, "epoch": 3.5611555652107447, "total_flos": 3398167992048998400, "step": 885200 }, { "loss": 2.8025, "learning_rate": 2.9002194094195996e-06, "epoch": 3.5615578647549393, "total_flos": 3398559940481341440, "step": 885300 }, { "loss": 2.825, "learning_rate": 2.8994082840236688e-06, "epoch": 3.561960164299134, "total_flos": 3398941160204359680, "step": 885400 }, { "loss": 2.895, "learning_rate": 2.8985971586277384e-06, "epoch": 3.5623624638433284, "total_flos": 3399338016224532480, "step": 885500 }, { "loss": 2.8625, "learning_rate": 2.8977860332318076e-06, "epoch": 3.562764763387523, "total_flos": 3399729938100664320, "step": 885600 }, { "loss": 2.8675, "learning_rate": 2.896974907835877e-06, "epoch": 3.5631670629317176, "total_flos": 3400118875058657280, "step": 885700 }, { "loss": 2.84, "learning_rate": 2.8961637824399464e-06, "epoch": 3.563569362475912, "total_flos": 3400505060793169920, "step": 885800 }, { "loss": 2.8625, "learning_rate": 2.895352657044016e-06, "epoch": 3.5639716620201067, "total_flos": 3400900684605143040, "step": 885900 }, { "loss": 2.83, "learning_rate": 2.894541531648085e-06, "epoch": 3.5643739615643018, "total_flos": 3401280560583874560, "step": 886000 }, { "loss": 2.835, "learning_rate": 2.893730406252155e-06, "epoch": 3.5647762611084963, "total_flos": 3401661918399191040, "step": 886100 }, { "loss": 2.805, "learning_rate": 2.892919280856224e-06, "epoch": 3.565178560652691, "total_flos": 3402049734685071360, "step": 886200 }, { "loss": 2.85, "learning_rate": 2.8921081554602936e-06, "epoch": 3.5655808601968855, "total_flos": 3402442044281886720, "step": 886300 }, { "loss": 2.8675, "learning_rate": 2.891297030064363e-06, "epoch": 3.56598315974108, "total_flos": 3402836765182679040, "step": 886400 }, { "loss": 2.8575, "learning_rate": 2.8904859046684325e-06, "epoch": 3.5663854592852746, "total_flos": 3403208520272025600, "step": 886500 }, { "loss": 2.8875, "learning_rate": 2.8896747792725017e-06, "epoch": 3.566787758829469, "total_flos": 3403580126646589440, "step": 886600 }, { "loss": 2.9, "learning_rate": 2.8888636538765713e-06, "epoch": 3.5671900583736638, "total_flos": 3403964618094827520, "step": 886700 }, { "loss": 2.875, "learning_rate": 2.8880525284806405e-06, "epoch": 3.5675923579178583, "total_flos": 3404352344089589760, "step": 886800 }, { "loss": 2.875, "learning_rate": 2.88724140308471e-06, "epoch": 3.5679946574620534, "total_flos": 3404726701687633920, "step": 886900 }, { "loss": 2.88, "learning_rate": 2.8864302776887793e-06, "epoch": 3.5683969570062475, "total_flos": 3405107799252080640, "step": 887000 }, { "loss": 2.835, "learning_rate": 2.885619152292849e-06, "epoch": 3.5687992565504425, "total_flos": 3405483877692610560, "step": 887100 }, { "loss": 2.8475, "learning_rate": 2.8848080268969185e-06, "epoch": 3.569201556094637, "total_flos": 3405866196842772480, "step": 887200 }, { "loss": 2.85, "learning_rate": 2.8839969015009877e-06, "epoch": 3.5696038556388316, "total_flos": 3406252802165422080, "step": 887300 }, { "loss": 2.875, "learning_rate": 2.8831857761050573e-06, "epoch": 3.570006155183026, "total_flos": 3406631679630612480, "step": 887400 }, { "loss": 2.91, "learning_rate": 2.8823746507091265e-06, "epoch": 3.570408454727221, "total_flos": 3407010615519467520, "step": 887500 }, { "loss": 2.925, "learning_rate": 2.881563525313196e-06, "epoch": 3.5708107542714154, "total_flos": 3407401634484418560, "step": 887600 }, { "loss": 2.875, "learning_rate": 2.8807523999172654e-06, "epoch": 3.57121305381561, "total_flos": 3407775089171281920, "step": 887700 }, { "loss": 2.87, "learning_rate": 2.879941274521335e-06, "epoch": 3.5716153533598045, "total_flos": 3408151523465041920, "step": 887800 }, { "loss": 2.86, "learning_rate": 2.879130149125404e-06, "epoch": 3.572017652903999, "total_flos": 3408534155978496000, "step": 887900 }, { "loss": 2.8075, "learning_rate": 2.8783190237294738e-06, "epoch": 3.572419952448194, "total_flos": 3408926412462888960, "step": 888000 }, { "loss": 2.8575, "learning_rate": 2.877507898333543e-06, "epoch": 3.5728222519923882, "total_flos": 3409301906666772480, "step": 888100 }, { "loss": 2.875, "learning_rate": 2.8766967729376126e-06, "epoch": 3.5732245515365832, "total_flos": 3409664892895180800, "step": 888200 }, { "loss": 2.8525, "learning_rate": 2.875885647541682e-06, "epoch": 3.573626851080778, "total_flos": 3410055109862553600, "step": 888300 }, { "loss": 2.84, "learning_rate": 2.8750745221457514e-06, "epoch": 3.5740291506249724, "total_flos": 3410429005382522880, "step": 888400 }, { "loss": 2.825, "learning_rate": 2.8742633967498206e-06, "epoch": 3.574431450169167, "total_flos": 3410803671032616960, "step": 888500 }, { "loss": 2.8625, "learning_rate": 2.8734522713538902e-06, "epoch": 3.5748337497133615, "total_flos": 3411180147816314880, "step": 888600 }, { "loss": 2.865, "learning_rate": 2.8726411459579594e-06, "epoch": 3.575236049257556, "total_flos": 3411566243259709440, "step": 888700 }, { "loss": 2.8275, "learning_rate": 2.871830020562029e-06, "epoch": 3.5756383488017507, "total_flos": 3411969244387153920, "step": 888800 }, { "loss": 2.8375, "learning_rate": 2.8710188951660982e-06, "epoch": 3.5760406483459453, "total_flos": 3412361474315335680, "step": 888900 }, { "loss": 2.8, "learning_rate": 2.870207769770168e-06, "epoch": 3.57644294789014, "total_flos": 3412743018024130560, "step": 889000 }, { "loss": 2.8825, "learning_rate": 2.869396644374237e-06, "epoch": 3.576845247434335, "total_flos": 3413134536245852160, "step": 889100 }, { "loss": 2.855, "learning_rate": 2.8685855189783067e-06, "epoch": 3.5772475469785294, "total_flos": 3413511209545512960, "step": 889200 }, { "loss": 2.795, "learning_rate": 2.867774393582376e-06, "epoch": 3.577649846522724, "total_flos": 3413895600080148480, "step": 889300 }, { "loss": 2.895, "learning_rate": 2.8669632681864455e-06, "epoch": 3.5780521460669186, "total_flos": 3414283570392053760, "step": 889400 }, { "loss": 2.865, "learning_rate": 2.8661521427905147e-06, "epoch": 3.578454445611113, "total_flos": 3414655585732270080, "step": 889500 }, { "loss": 2.8175, "learning_rate": 2.8653410173945843e-06, "epoch": 3.5788567451553077, "total_flos": 3415032907003484160, "step": 889600 }, { "loss": 2.85, "learning_rate": 2.8645298919986535e-06, "epoch": 3.5792590446995023, "total_flos": 3415403679513016320, "step": 889700 }, { "loss": 2.8575, "learning_rate": 2.863718766602723e-06, "epoch": 3.579661344243697, "total_flos": 3415786370450135040, "step": 889800 }, { "loss": 2.8975, "learning_rate": 2.8629076412067923e-06, "epoch": 3.5800636437878914, "total_flos": 3416158428280289280, "step": 889900 }, { "loss": 2.8325, "learning_rate": 2.862096515810862e-06, "epoch": 3.5804659433320865, "total_flos": 3416529184856094720, "step": 890000 }, { "loss": 2.825, "learning_rate": 2.861285390414931e-06, "epoch": 3.5808682428762806, "total_flos": 3416923236540364800, "step": 890100 }, { "loss": 2.7875, "learning_rate": 2.8604742650190008e-06, "epoch": 3.5812705424204756, "total_flos": 3417309167335249920, "step": 890200 }, { "loss": 2.865, "learning_rate": 2.85966313962307e-06, "epoch": 3.58167284196467, "total_flos": 3417681156119255040, "step": 890300 }, { "loss": 2.8725, "learning_rate": 2.8588520142271396e-06, "epoch": 3.5820751415088647, "total_flos": 3418080253483653120, "step": 890400 }, { "loss": 2.835, "learning_rate": 2.8580408888312088e-06, "epoch": 3.5824774410530593, "total_flos": 3418452847749273600, "step": 890500 }, { "loss": 2.8625, "learning_rate": 2.8572297634352784e-06, "epoch": 3.582879740597254, "total_flos": 3418850585435658240, "step": 890600 }, { "loss": 2.9075, "learning_rate": 2.8564186380393476e-06, "epoch": 3.5832820401414485, "total_flos": 3419251982567946240, "step": 890700 }, { "loss": 2.835, "learning_rate": 2.855607512643417e-06, "epoch": 3.583684339685643, "total_flos": 3419642831573145600, "step": 890800 }, { "loss": 2.82, "learning_rate": 2.8547963872474864e-06, "epoch": 3.5840866392298376, "total_flos": 3420024157521008640, "step": 890900 }, { "loss": 2.8425, "learning_rate": 2.853985261851556e-06, "epoch": 3.584488938774032, "total_flos": 3420413078545274880, "step": 891000 }, { "loss": 2.8525, "learning_rate": 2.853174136455625e-06, "epoch": 3.584891238318227, "total_flos": 3420796104090654720, "step": 891100 }, { "loss": 2.8175, "learning_rate": 2.8523630110596953e-06, "epoch": 3.5852935378624213, "total_flos": 3421186512262748160, "step": 891200 }, { "loss": 2.825, "learning_rate": 2.851551885663764e-06, "epoch": 3.5856958374066163, "total_flos": 3421563498925701120, "step": 891300 }, { "loss": 2.8425, "learning_rate": 2.850740760267834e-06, "epoch": 3.586098136950811, "total_flos": 3421958012688046080, "step": 891400 }, { "loss": 2.8625, "learning_rate": 2.849929634871903e-06, "epoch": 3.5865004364950055, "total_flos": 3422348038450698240, "step": 891500 }, { "loss": 2.8325, "learning_rate": 2.849118509475973e-06, "epoch": 3.5869027360392, "total_flos": 3422717021071595520, "step": 891600 }, { "loss": 2.865, "learning_rate": 2.8483073840800417e-06, "epoch": 3.5873050355833946, "total_flos": 3423084213803857920, "step": 891700 }, { "loss": 2.8275, "learning_rate": 2.8474962586841117e-06, "epoch": 3.587707335127589, "total_flos": 3423476013521418240, "step": 891800 }, { "loss": 2.8425, "learning_rate": 2.8466851332881813e-06, "epoch": 3.588109634671784, "total_flos": 3423860011024128000, "step": 891900 }, { "loss": 2.8825, "learning_rate": 2.8458740078922505e-06, "epoch": 3.588511934215979, "total_flos": 3424220235406571520, "step": 892000 }, { "loss": 2.8975, "learning_rate": 2.84506288249632e-06, "epoch": 3.588914233760173, "total_flos": 3424606660146984960, "step": 892100 }, { "loss": 2.9075, "learning_rate": 2.8442517571003893e-06, "epoch": 3.589316533304368, "total_flos": 3424983928305776640, "step": 892200 }, { "loss": 2.8825, "learning_rate": 2.843440631704459e-06, "epoch": 3.5897188328485625, "total_flos": 3425357292701521920, "step": 892300 }, { "loss": 2.8425, "learning_rate": 2.842629506308528e-06, "epoch": 3.590121132392757, "total_flos": 3425756405999646720, "step": 892400 }, { "loss": 2.8825, "learning_rate": 2.8418183809125978e-06, "epoch": 3.5905234319369517, "total_flos": 3426152778696775680, "step": 892500 }, { "loss": 2.83, "learning_rate": 2.841007255516667e-06, "epoch": 3.5909257314811462, "total_flos": 3426543845462906880, "step": 892600 }, { "loss": 2.825, "learning_rate": 2.8401961301207366e-06, "epoch": 3.591328031025341, "total_flos": 3426936070079846400, "step": 892700 }, { "loss": 2.8425, "learning_rate": 2.8393850047248058e-06, "epoch": 3.5917303305695354, "total_flos": 3427311596151183360, "step": 892800 }, { "loss": 2.82, "learning_rate": 2.8385738793288754e-06, "epoch": 3.59213263011373, "total_flos": 3427686256490035200, "step": 892900 }, { "loss": 2.875, "learning_rate": 2.8377627539329446e-06, "epoch": 3.5925349296579245, "total_flos": 3428074115265853440, "step": 893000 }, { "loss": 2.85, "learning_rate": 2.836951628537014e-06, "epoch": 3.5929372292021196, "total_flos": 3428475278703482880, "step": 893100 }, { "loss": 2.8175, "learning_rate": 2.8361405031410834e-06, "epoch": 3.5933395287463137, "total_flos": 3428846454867425280, "step": 893200 }, { "loss": 2.86, "learning_rate": 2.835329377745153e-06, "epoch": 3.5937418282905087, "total_flos": 3429227764881561600, "step": 893300 }, { "loss": 2.94, "learning_rate": 2.8345182523492222e-06, "epoch": 3.5941441278347033, "total_flos": 3429608256964392960, "step": 893400 }, { "loss": 2.84, "learning_rate": 2.833707126953292e-06, "epoch": 3.594546427378898, "total_flos": 3429976272939202560, "step": 893500 }, { "loss": 2.835, "learning_rate": 2.832896001557361e-06, "epoch": 3.5949487269230924, "total_flos": 3430364838110238720, "step": 893600 }, { "loss": 2.8925, "learning_rate": 2.8320848761614307e-06, "epoch": 3.595351026467287, "total_flos": 3430752590661212160, "step": 893700 }, { "loss": 2.825, "learning_rate": 2.8312737507655e-06, "epoch": 3.5957533260114816, "total_flos": 3431125131814410240, "step": 893800 }, { "loss": 2.885, "learning_rate": 2.8304626253695695e-06, "epoch": 3.596155625555676, "total_flos": 3431502793005127680, "step": 893900 }, { "loss": 2.795, "learning_rate": 2.8296514999736387e-06, "epoch": 3.5965579250998707, "total_flos": 3431882541514045440, "step": 894000 }, { "loss": 2.87, "learning_rate": 2.8288403745777083e-06, "epoch": 3.5969602246440653, "total_flos": 3432261912924764160, "step": 894100 }, { "loss": 2.8975, "learning_rate": 2.8280292491817775e-06, "epoch": 3.5973625241882603, "total_flos": 3432637359327467520, "step": 894200 }, { "loss": 2.93, "learning_rate": 2.827218123785847e-06, "epoch": 3.597764823732455, "total_flos": 3433019710345082880, "step": 894300 }, { "loss": 2.8775, "learning_rate": 2.8264069983899163e-06, "epoch": 3.5981671232766494, "total_flos": 3433416810682398720, "step": 894400 }, { "loss": 2.85, "learning_rate": 2.825595872993986e-06, "epoch": 3.598569422820844, "total_flos": 3433806841756293120, "step": 894500 }, { "loss": 2.815, "learning_rate": 2.824784747598055e-06, "epoch": 3.5989717223650386, "total_flos": 3434201424564787200, "step": 894600 }, { "loss": 2.8425, "learning_rate": 2.8239736222021247e-06, "epoch": 3.599374021909233, "total_flos": 3434596448206387200, "step": 894700 }, { "loss": 2.8525, "learning_rate": 2.823162496806194e-06, "epoch": 3.5997763214534277, "total_flos": 3434981895678228480, "step": 894800 }, { "loss": 2.83, "learning_rate": 2.8223513714102635e-06, "epoch": 3.6001786209976223, "total_flos": 3435372829663303680, "step": 894900 }, { "loss": 2.855, "learning_rate": 2.8215402460143327e-06, "epoch": 3.600580920541817, "total_flos": 3435772659979130880, "step": 895000 }, { "loss": 2.865, "learning_rate": 2.8207291206184024e-06, "epoch": 3.600983220086012, "total_flos": 3436149301411338240, "step": 895100 }, { "loss": 2.86, "learning_rate": 2.8199179952224716e-06, "epoch": 3.601385519630206, "total_flos": 3436542885706291200, "step": 895200 }, { "loss": 2.925, "learning_rate": 2.819106869826541e-06, "epoch": 3.601787819174401, "total_flos": 3436933740022732800, "step": 895300 }, { "loss": 2.8225, "learning_rate": 2.8182957444306104e-06, "epoch": 3.6021901187185956, "total_flos": 3437323813586565120, "step": 895400 }, { "loss": 2.835, "learning_rate": 2.81748461903468e-06, "epoch": 3.60259241826279, "total_flos": 3437705633479956480, "step": 895500 }, { "loss": 2.8825, "learning_rate": 2.816673493638749e-06, "epoch": 3.6029947178069848, "total_flos": 3438095250276956160, "step": 895600 }, { "loss": 2.85, "learning_rate": 2.815862368242819e-06, "epoch": 3.6033970173511793, "total_flos": 3438481319164139520, "step": 895700 }, { "loss": 2.87, "learning_rate": 2.815051242846888e-06, "epoch": 3.603799316895374, "total_flos": 3438855713940879360, "step": 895800 }, { "loss": 2.8525, "learning_rate": 2.8142401174509576e-06, "epoch": 3.6042016164395685, "total_flos": 3439242186482472960, "step": 895900 }, { "loss": 2.84, "learning_rate": 2.813428992055027e-06, "epoch": 3.604603915983763, "total_flos": 3439635961982146560, "step": 896000 }, { "loss": 2.7925, "learning_rate": 2.8126178666590964e-06, "epoch": 3.6050062155279576, "total_flos": 3440023587063306240, "step": 896100 }, { "loss": 2.815, "learning_rate": 2.8118067412631656e-06, "epoch": 3.6054085150721527, "total_flos": 3440412848007075840, "step": 896200 }, { "loss": 2.765, "learning_rate": 2.8109956158672353e-06, "epoch": 3.605810814616347, "total_flos": 3440809087923148800, "step": 896300 }, { "loss": 2.885, "learning_rate": 2.8101844904713044e-06, "epoch": 3.606213114160542, "total_flos": 3441208514584565760, "step": 896400 }, { "loss": 2.8475, "learning_rate": 2.809373365075374e-06, "epoch": 3.6066154137047364, "total_flos": 3441586377602488320, "step": 896500 }, { "loss": 2.8075, "learning_rate": 2.8085622396794437e-06, "epoch": 3.607017713248931, "total_flos": 3441973572473026560, "step": 896600 }, { "loss": 2.865, "learning_rate": 2.807751114283513e-06, "epoch": 3.6074200127931255, "total_flos": 3442358855296358400, "step": 896700 }, { "loss": 2.87, "learning_rate": 2.8069399888875825e-06, "epoch": 3.60782231233732, "total_flos": 3442745582777579520, "step": 896800 }, { "loss": 2.885, "learning_rate": 2.8061288634916517e-06, "epoch": 3.6082246118815147, "total_flos": 3443138168558991360, "step": 896900 }, { "loss": 2.8675, "learning_rate": 2.8053177380957213e-06, "epoch": 3.6086269114257092, "total_flos": 3443534955533015040, "step": 897000 }, { "loss": 2.9225, "learning_rate": 2.8045066126997905e-06, "epoch": 3.609029210969904, "total_flos": 3443922288495851520, "step": 897100 }, { "loss": 2.835, "learning_rate": 2.80369548730386e-06, "epoch": 3.6094315105140984, "total_flos": 3444287319552522240, "step": 897200 }, { "loss": 2.8675, "learning_rate": 2.8028843619079293e-06, "epoch": 3.6098338100582934, "total_flos": 3444664821405972480, "step": 897300 }, { "loss": 2.8125, "learning_rate": 2.802073236511999e-06, "epoch": 3.610236109602488, "total_flos": 3445047193668556800, "step": 897400 }, { "loss": 2.865, "learning_rate": 2.801262111116068e-06, "epoch": 3.6106384091466825, "total_flos": 3445438749068974080, "step": 897500 }, { "loss": 2.8875, "learning_rate": 2.8004509857201378e-06, "epoch": 3.611040708690877, "total_flos": 3445825205676840960, "step": 897600 }, { "loss": 2.8175, "learning_rate": 2.799639860324207e-06, "epoch": 3.6114430082350717, "total_flos": 3446224340219934720, "step": 897700 }, { "loss": 2.8675, "learning_rate": 2.7988287349282766e-06, "epoch": 3.6118453077792663, "total_flos": 3446618758379919360, "step": 897800 }, { "loss": 2.915, "learning_rate": 2.7980176095323458e-06, "epoch": 3.612247607323461, "total_flos": 3446999245151508480, "step": 897900 }, { "loss": 2.8475, "learning_rate": 2.7972064841364154e-06, "epoch": 3.6126499068676554, "total_flos": 3447389350582794240, "step": 898000 }, { "loss": 2.875, "learning_rate": 2.7963953587404846e-06, "epoch": 3.61305220641185, "total_flos": 3447771234211092480, "step": 898100 }, { "loss": 2.8675, "learning_rate": 2.795584233344554e-06, "epoch": 3.613454505956045, "total_flos": 3448134804676147200, "step": 898200 }, { "loss": 2.8775, "learning_rate": 2.7947731079486234e-06, "epoch": 3.613856805500239, "total_flos": 3448515700413388800, "step": 898300 }, { "loss": 2.8375, "learning_rate": 2.793961982552693e-06, "epoch": 3.614259105044434, "total_flos": 3448893696212367360, "step": 898400 }, { "loss": 2.8425, "learning_rate": 2.7931508571567622e-06, "epoch": 3.6146614045886287, "total_flos": 3449283870689802240, "step": 898500 }, { "loss": 2.8675, "learning_rate": 2.792339731760832e-06, "epoch": 3.6150637041328233, "total_flos": 3449673275037112320, "step": 898600 }, { "loss": 2.855, "learning_rate": 2.791528606364901e-06, "epoch": 3.615466003677018, "total_flos": 3450060379616532480, "step": 898700 }, { "loss": 2.8475, "learning_rate": 2.7907174809689707e-06, "epoch": 3.6158683032212124, "total_flos": 3450436813910292480, "step": 898800 }, { "loss": 2.82, "learning_rate": 2.78990635557304e-06, "epoch": 3.616270602765407, "total_flos": 3450844674824386560, "step": 898900 }, { "loss": 2.8825, "learning_rate": 2.7890952301771095e-06, "epoch": 3.6166729023096016, "total_flos": 3451237770485053440, "step": 899000 }, { "loss": 2.8675, "learning_rate": 2.7882841047811787e-06, "epoch": 3.617075201853796, "total_flos": 3451631083906652160, "step": 899100 }, { "loss": 2.8125, "learning_rate": 2.7874729793852483e-06, "epoch": 3.6174775013979907, "total_flos": 3452011289182402560, "step": 899200 }, { "loss": 2.8275, "learning_rate": 2.7866618539893175e-06, "epoch": 3.6178798009421858, "total_flos": 3452383554151004160, "step": 899300 }, { "loss": 2.8575, "learning_rate": 2.785850728593387e-06, "epoch": 3.61828210048638, "total_flos": 3452758761547806720, "step": 899400 }, { "loss": 2.8475, "learning_rate": 2.7850396031974563e-06, "epoch": 3.618684400030575, "total_flos": 3453157434012825600, "step": 899500 }, { "loss": 2.8275, "learning_rate": 2.784228477801526e-06, "epoch": 3.6190866995747695, "total_flos": 3453532349291304960, "step": 899600 }, { "loss": 2.845, "learning_rate": 2.783417352405595e-06, "epoch": 3.619488999118964, "total_flos": 3453918965236439040, "step": 899700 }, { "loss": 2.86, "learning_rate": 2.7826062270096647e-06, "epoch": 3.6198912986631586, "total_flos": 3454297125683927040, "step": 899800 }, { "loss": 2.805, "learning_rate": 2.781795101613734e-06, "epoch": 3.620293598207353, "total_flos": 3454681484351109120, "step": 899900 }, { "loss": 2.7675, "learning_rate": 2.7809839762178035e-06, "epoch": 3.6206958977515478, "total_flos": 3455077108163082240, "step": 900000 }, { "loss": 2.8725, "learning_rate": 2.7801728508218727e-06, "epoch": 3.6210981972957423, "total_flos": 3455466995833436160, "step": 900100 }, { "loss": 2.8725, "learning_rate": 2.7793617254259424e-06, "epoch": 3.6215004968399374, "total_flos": 3455858923020810240, "step": 900200 }, { "loss": 2.8325, "learning_rate": 2.7785506000300116e-06, "epoch": 3.6219027963841315, "total_flos": 3456257457393530880, "step": 900300 }, { "loss": 2.805, "learning_rate": 2.777739474634081e-06, "epoch": 3.6223050959283265, "total_flos": 3456645751691212800, "step": 900400 }, { "loss": 2.8225, "learning_rate": 2.7769283492381504e-06, "epoch": 3.622707395472521, "total_flos": 3457045534205859840, "step": 900500 }, { "loss": 2.86, "learning_rate": 2.77611722384222e-06, "epoch": 3.6231096950167156, "total_flos": 3457428772200929280, "step": 900600 }, { "loss": 2.8225, "learning_rate": 2.775306098446289e-06, "epoch": 3.62351199456091, "total_flos": 3457826127477872640, "step": 900700 }, { "loss": 2.8525, "learning_rate": 2.774494973050359e-06, "epoch": 3.623914294105105, "total_flos": 3458210996024309760, "step": 900800 }, { "loss": 2.925, "learning_rate": 2.773683847654428e-06, "epoch": 3.6243165936492994, "total_flos": 3458591355326085120, "step": 900900 }, { "loss": 2.81, "learning_rate": 2.7728727222584976e-06, "epoch": 3.624718893193494, "total_flos": 3458978443971778560, "step": 901000 }, { "loss": 2.8325, "learning_rate": 2.772061596862567e-06, "epoch": 3.6251211927376885, "total_flos": 3459366424906168320, "step": 901100 }, { "loss": 2.8525, "learning_rate": 2.7712504714666364e-06, "epoch": 3.625523492281883, "total_flos": 3459762409882613760, "step": 901200 }, { "loss": 2.87, "learning_rate": 2.7704393460707065e-06, "epoch": 3.625925791826078, "total_flos": 3460147682083461120, "step": 901300 }, { "loss": 2.845, "learning_rate": 2.7696282206747753e-06, "epoch": 3.6263280913702722, "total_flos": 3460534282094868480, "step": 901400 }, { "loss": 2.8225, "learning_rate": 2.7688170952788453e-06, "epoch": 3.6267303909144673, "total_flos": 3460925003630254080, "step": 901500 }, { "loss": 2.83, "learning_rate": 2.768005969882914e-06, "epoch": 3.627132690458662, "total_flos": 3461310334254766080, "step": 901600 }, { "loss": 2.89, "learning_rate": 2.767194844486984e-06, "epoch": 3.6275349900028564, "total_flos": 3461698644486174720, "step": 901700 }, { "loss": 2.8575, "learning_rate": 2.766383719091053e-06, "epoch": 3.627937289547051, "total_flos": 3462090709765847040, "step": 901800 }, { "loss": 2.8075, "learning_rate": 2.765572593695123e-06, "epoch": 3.6283395890912455, "total_flos": 3462486859390801920, "step": 901900 }, { "loss": 2.8525, "learning_rate": 2.7647614682991917e-06, "epoch": 3.62874188863544, "total_flos": 3462867717949347840, "step": 902000 }, { "loss": 2.81, "learning_rate": 2.7639503429032617e-06, "epoch": 3.6291441881796347, "total_flos": 3463242649161553920, "step": 902100 }, { "loss": 2.845, "learning_rate": 2.7631392175073305e-06, "epoch": 3.6295464877238293, "total_flos": 3463624272538982400, "step": 902200 }, { "loss": 2.865, "learning_rate": 2.7623280921114006e-06, "epoch": 3.629948787268024, "total_flos": 3464034220771276800, "step": 902300 }, { "loss": 2.8875, "learning_rate": 2.7615169667154693e-06, "epoch": 3.630351086812219, "total_flos": 3464431623849400320, "step": 902400 }, { "loss": 2.8625, "learning_rate": 2.7607058413195394e-06, "epoch": 3.630753386356413, "total_flos": 3464818537224099840, "step": 902500 }, { "loss": 2.8275, "learning_rate": 2.7598947159236086e-06, "epoch": 3.631155685900608, "total_flos": 3465213369660979200, "step": 902600 }, { "loss": 2.7975, "learning_rate": 2.759083590527678e-06, "epoch": 3.6315579854448026, "total_flos": 3465599481038100480, "step": 902700 }, { "loss": 2.87, "learning_rate": 2.7582724651317474e-06, "epoch": 3.631960284988997, "total_flos": 3465998222549268480, "step": 902800 }, { "loss": 2.8625, "learning_rate": 2.757461339735817e-06, "epoch": 3.6323625845331917, "total_flos": 3466384286125209600, "step": 902900 }, { "loss": 2.8525, "learning_rate": 2.756650214339886e-06, "epoch": 3.6327648840773863, "total_flos": 3466770073516554240, "step": 903000 }, { "loss": 2.86, "learning_rate": 2.755839088943956e-06, "epoch": 3.633167183621581, "total_flos": 3467143740653107200, "step": 903100 }, { "loss": 2.7975, "learning_rate": 2.755027963548025e-06, "epoch": 3.6335694831657754, "total_flos": 3467525762373703680, "step": 903200 }, { "loss": 2.8125, "learning_rate": 2.7542168381520946e-06, "epoch": 3.6339717827099705, "total_flos": 3467890241061181440, "step": 903300 }, { "loss": 2.82, "learning_rate": 2.753405712756164e-06, "epoch": 3.6343740822541646, "total_flos": 3468264752685250560, "step": 903400 }, { "loss": 2.885, "learning_rate": 2.7525945873602334e-06, "epoch": 3.6347763817983596, "total_flos": 3468645951163299840, "step": 903500 }, { "loss": 2.8575, "learning_rate": 2.7517834619643026e-06, "epoch": 3.635178681342554, "total_flos": 3469050439438571520, "step": 903600 }, { "loss": 2.825, "learning_rate": 2.7509723365683723e-06, "epoch": 3.6355809808867487, "total_flos": 3469437995473582080, "step": 903700 }, { "loss": 2.875, "learning_rate": 2.7501612111724415e-06, "epoch": 3.6359832804309433, "total_flos": 3469817866141071360, "step": 903800 }, { "loss": 2.82, "learning_rate": 2.749350085776511e-06, "epoch": 3.636385579975138, "total_flos": 3470205863009187840, "step": 903900 }, { "loss": 2.835, "learning_rate": 2.7485389603805803e-06, "epoch": 3.6367878795193325, "total_flos": 3470590944005314560, "step": 904000 }, { "loss": 2.8475, "learning_rate": 2.74772783498465e-06, "epoch": 3.637190179063527, "total_flos": 3471004498571089920, "step": 904100 }, { "loss": 2.87, "learning_rate": 2.746916709588719e-06, "epoch": 3.6375924786077216, "total_flos": 3471391024225105920, "step": 904200 }, { "loss": 2.8375, "learning_rate": 2.7461055841927887e-06, "epoch": 3.637994778151916, "total_flos": 3471781108411422720, "step": 904300 }, { "loss": 2.825, "learning_rate": 2.745294458796858e-06, "epoch": 3.638397077696111, "total_flos": 3472170767698360320, "step": 904400 }, { "loss": 2.865, "learning_rate": 2.7444833334009275e-06, "epoch": 3.6387993772403053, "total_flos": 3472560745659832320, "step": 904500 }, { "loss": 2.88, "learning_rate": 2.7436722080049967e-06, "epoch": 3.6392016767845003, "total_flos": 3472959439369820160, "step": 904600 }, { "loss": 2.84, "learning_rate": 2.7428610826090663e-06, "epoch": 3.639603976328695, "total_flos": 3473325973508044800, "step": 904700 }, { "loss": 2.855, "learning_rate": 2.7420499572131355e-06, "epoch": 3.6400062758728895, "total_flos": 3473715930224547840, "step": 904800 }, { "loss": 2.8375, "learning_rate": 2.741238831817205e-06, "epoch": 3.640408575417084, "total_flos": 3474098626472908800, "step": 904900 }, { "loss": 2.86, "learning_rate": 2.7404277064212743e-06, "epoch": 3.6408108749612786, "total_flos": 3474488646924318720, "step": 905000 }, { "loss": 2.8425, "learning_rate": 2.739616581025344e-06, "epoch": 3.641213174505473, "total_flos": 3474875788682434560, "step": 905100 }, { "loss": 2.8525, "learning_rate": 2.738805455629413e-06, "epoch": 3.641615474049668, "total_flos": 3475243246976808960, "step": 905200 }, { "loss": 2.8825, "learning_rate": 2.7379943302334828e-06, "epoch": 3.6420177735938624, "total_flos": 3475610094478325760, "step": 905300 }, { "loss": 2.825, "learning_rate": 2.737183204837552e-06, "epoch": 3.642420073138057, "total_flos": 3475998086035200000, "step": 905400 }, { "loss": 2.8425, "learning_rate": 2.7363720794416216e-06, "epoch": 3.642822372682252, "total_flos": 3476384781648967680, "step": 905500 }, { "loss": 2.8425, "learning_rate": 2.735560954045691e-06, "epoch": 3.6432246722264465, "total_flos": 3476747805056071680, "step": 905600 }, { "loss": 2.895, "learning_rate": 2.7347498286497604e-06, "epoch": 3.643626971770641, "total_flos": 3477131823803750400, "step": 905700 }, { "loss": 2.8425, "learning_rate": 2.7339387032538296e-06, "epoch": 3.6440292713148357, "total_flos": 3477537124699084800, "step": 905800 }, { "loss": 2.81, "learning_rate": 2.7331275778578992e-06, "epoch": 3.6444315708590302, "total_flos": 3477923140473845760, "step": 905900 }, { "loss": 2.8475, "learning_rate": 2.7323164524619684e-06, "epoch": 3.644833870403225, "total_flos": 3478317362117867520, "step": 906000 }, { "loss": 2.8725, "learning_rate": 2.731505327066038e-06, "epoch": 3.6452361699474194, "total_flos": 3478688931313735680, "step": 906100 }, { "loss": 2.8325, "learning_rate": 2.7306942016701077e-06, "epoch": 3.645638469491614, "total_flos": 3479078771182909440, "step": 906200 }, { "loss": 2.8575, "learning_rate": 2.729883076274177e-06, "epoch": 3.6460407690358085, "total_flos": 3479455391370147840, "step": 906300 }, { "loss": 2.7875, "learning_rate": 2.7290719508782465e-06, "epoch": 3.6464430685800036, "total_flos": 3479847546940938240, "step": 906400 }, { "loss": 2.8625, "learning_rate": 2.7282608254823157e-06, "epoch": 3.6468453681241977, "total_flos": 3480225415270103040, "step": 906500 }, { "loss": 2.8375, "learning_rate": 2.7274497000863853e-06, "epoch": 3.6472476676683927, "total_flos": 3480616365188904960, "step": 906600 }, { "loss": 2.86, "learning_rate": 2.7266385746904545e-06, "epoch": 3.6476499672125873, "total_flos": 3481001658634721280, "step": 906700 }, { "loss": 2.8525, "learning_rate": 2.725827449294524e-06, "epoch": 3.648052266756782, "total_flos": 3481368654851020800, "step": 906800 }, { "loss": 2.8775, "learning_rate": 2.7250163238985933e-06, "epoch": 3.6484545663009764, "total_flos": 3481756869480069120, "step": 906900 }, { "loss": 2.87, "learning_rate": 2.724205198502663e-06, "epoch": 3.648856865845171, "total_flos": 3482134312909854720, "step": 907000 }, { "loss": 2.82, "learning_rate": 2.723394073106732e-06, "epoch": 3.6492591653893656, "total_flos": 3482534615926241280, "step": 907100 }, { "loss": 2.865, "learning_rate": 2.7225829477108017e-06, "epoch": 3.64966146493356, "total_flos": 3482925788917217280, "step": 907200 }, { "loss": 2.8575, "learning_rate": 2.721771822314871e-06, "epoch": 3.6500637644777547, "total_flos": 3483314269108377600, "step": 907300 }, { "loss": 2.82, "learning_rate": 2.7209606969189406e-06, "epoch": 3.6504660640219493, "total_flos": 3483709080300288000, "step": 907400 }, { "loss": 2.795, "learning_rate": 2.7201495715230098e-06, "epoch": 3.6508683635661443, "total_flos": 3484095324458465280, "step": 907500 }, { "loss": 2.785, "learning_rate": 2.7193384461270794e-06, "epoch": 3.6512706631103384, "total_flos": 3484488080199628800, "step": 907600 }, { "loss": 2.8975, "learning_rate": 2.7185273207311486e-06, "epoch": 3.6516729626545334, "total_flos": 3484882169062594560, "step": 907700 }, { "loss": 2.7725, "learning_rate": 2.717716195335218e-06, "epoch": 3.652075262198728, "total_flos": 3485265237097912320, "step": 907800 }, { "loss": 2.8375, "learning_rate": 2.7169050699392874e-06, "epoch": 3.6524775617429226, "total_flos": 3485639653119621120, "step": 907900 }, { "loss": 2.795, "learning_rate": 2.716093944543357e-06, "epoch": 3.652879861287117, "total_flos": 3486012507636111360, "step": 908000 }, { "loss": 2.8525, "learning_rate": 2.715282819147426e-06, "epoch": 3.6532821608313117, "total_flos": 3486415530008524800, "step": 908100 }, { "loss": 2.84, "learning_rate": 2.714471693751496e-06, "epoch": 3.6536844603755063, "total_flos": 3486792644141291520, "step": 908200 }, { "loss": 2.895, "learning_rate": 2.713660568355565e-06, "epoch": 3.654086759919701, "total_flos": 3487170098193561600, "step": 908300 }, { "loss": 2.8425, "learning_rate": 2.7128494429596346e-06, "epoch": 3.6544890594638955, "total_flos": 3487559226356275200, "step": 908400 }, { "loss": 2.8475, "learning_rate": 2.712038317563704e-06, "epoch": 3.65489135900809, "total_flos": 3487939962756249600, "step": 908500 }, { "loss": 2.8875, "learning_rate": 2.7112271921677734e-06, "epoch": 3.655293658552285, "total_flos": 3488318255984793600, "step": 908600 }, { "loss": 2.8775, "learning_rate": 2.7104160667718426e-06, "epoch": 3.6556959580964796, "total_flos": 3488704797572536320, "step": 908700 }, { "loss": 2.84, "learning_rate": 2.7096049413759123e-06, "epoch": 3.656098257640674, "total_flos": 3489105690136811520, "step": 908800 }, { "loss": 2.85, "learning_rate": 2.7087938159799815e-06, "epoch": 3.6565005571848688, "total_flos": 3489493368330393600, "step": 908900 }, { "loss": 2.865, "learning_rate": 2.707982690584051e-06, "epoch": 3.6569028567290633, "total_flos": 3489860401725388800, "step": 909000 }, { "loss": 2.9025, "learning_rate": 2.7071715651881203e-06, "epoch": 3.657305156273258, "total_flos": 3490247766555678720, "step": 909100 }, { "loss": 2.845, "learning_rate": 2.70636043979219e-06, "epoch": 3.6577074558174525, "total_flos": 3490618560310179840, "step": 909200 }, { "loss": 2.79, "learning_rate": 2.705549314396259e-06, "epoch": 3.658109755361647, "total_flos": 3491000502362142720, "step": 909300 }, { "loss": 2.855, "learning_rate": 2.7047381890003287e-06, "epoch": 3.6585120549058416, "total_flos": 3491358267639429120, "step": 909400 }, { "loss": 2.835, "learning_rate": 2.703927063604398e-06, "epoch": 3.6589143544500367, "total_flos": 3491737585937725440, "step": 909500 }, { "loss": 2.85, "learning_rate": 2.7031159382084675e-06, "epoch": 3.659316653994231, "total_flos": 3492121519705528320, "step": 909600 }, { "loss": 2.84, "learning_rate": 2.7023048128125367e-06, "epoch": 3.659718953538426, "total_flos": 3492495218709534720, "step": 909700 }, { "loss": 2.8275, "learning_rate": 2.7014936874166063e-06, "epoch": 3.6601212530826204, "total_flos": 3492888356860139520, "step": 909800 }, { "loss": 2.865, "learning_rate": 2.7006825620206755e-06, "epoch": 3.660523552626815, "total_flos": 3493266724446074880, "step": 909900 }, { "loss": 2.8075, "learning_rate": 2.699871436624745e-06, "epoch": 3.6609258521710095, "total_flos": 3493647466157291520, "step": 910000 }, { "loss": 2.81, "learning_rate": 2.6990603112288143e-06, "epoch": 3.661328151715204, "total_flos": 3494039478324541440, "step": 910100 }, { "loss": 2.8125, "learning_rate": 2.698249185832884e-06, "epoch": 3.6617304512593987, "total_flos": 3494420368750540800, "step": 910200 }, { "loss": 2.8875, "learning_rate": 2.697438060436953e-06, "epoch": 3.6621327508035932, "total_flos": 3494803659858032640, "step": 910300 }, { "loss": 2.8575, "learning_rate": 2.6966269350410228e-06, "epoch": 3.662535050347788, "total_flos": 3495195098411120640, "step": 910400 }, { "loss": 2.865, "learning_rate": 2.695815809645092e-06, "epoch": 3.6629373498919824, "total_flos": 3495580243142154240, "step": 910500 }, { "loss": 2.875, "learning_rate": 2.6950046842491616e-06, "epoch": 3.6633396494361774, "total_flos": 3495964506206976000, "step": 910600 }, { "loss": 2.785, "learning_rate": 2.694193558853231e-06, "epoch": 3.6637419489803715, "total_flos": 3496351403647948800, "step": 910700 }, { "loss": 2.86, "learning_rate": 2.6933824334573004e-06, "epoch": 3.6641442485245665, "total_flos": 3496741949912340480, "step": 910800 }, { "loss": 2.8325, "learning_rate": 2.69257130806137e-06, "epoch": 3.664546548068761, "total_flos": 3497125734965360640, "step": 910900 }, { "loss": 2.795, "learning_rate": 2.6917601826654392e-06, "epoch": 3.6649488476129557, "total_flos": 3497503045614090240, "step": 911000 }, { "loss": 2.8025, "learning_rate": 2.690949057269509e-06, "epoch": 3.6653511471571503, "total_flos": 3497884010397480960, "step": 911100 }, { "loss": 2.8525, "learning_rate": 2.690137931873578e-06, "epoch": 3.665753446701345, "total_flos": 3498273914001561600, "step": 911200 }, { "loss": 2.8575, "learning_rate": 2.689326806477648e-06, "epoch": 3.6661557462455394, "total_flos": 3498639736433326080, "step": 911300 }, { "loss": 2.92, "learning_rate": 2.688515681081717e-06, "epoch": 3.666558045789734, "total_flos": 3499037734370580480, "step": 911400 }, { "loss": 2.8825, "learning_rate": 2.687704555685787e-06, "epoch": 3.666960345333929, "total_flos": 3499434197358827520, "step": 911500 }, { "loss": 2.8325, "learning_rate": 2.6868934302898557e-06, "epoch": 3.667362644878123, "total_flos": 3499832577705523200, "step": 911600 }, { "loss": 2.835, "learning_rate": 2.6860823048939257e-06, "epoch": 3.667764944422318, "total_flos": 3500197778721945600, "step": 911700 }, { "loss": 2.8225, "learning_rate": 2.6852711794979945e-06, "epoch": 3.6681672439665127, "total_flos": 3500588755196958720, "step": 911800 }, { "loss": 2.8, "learning_rate": 2.6844600541020645e-06, "epoch": 3.6685695435107073, "total_flos": 3500981728699054080, "step": 911900 }, { "loss": 2.7775, "learning_rate": 2.6836489287061333e-06, "epoch": 3.668971843054902, "total_flos": 3501360255622256640, "step": 912000 }, { "loss": 2.9025, "learning_rate": 2.6828378033102033e-06, "epoch": 3.6693741425990964, "total_flos": 3501736493400053760, "step": 912100 }, { "loss": 2.87, "learning_rate": 2.682026677914272e-06, "epoch": 3.669776442143291, "total_flos": 3502126258911836160, "step": 912200 }, { "loss": 2.905, "learning_rate": 2.681215552518342e-06, "epoch": 3.6701787416874856, "total_flos": 3502500276590376960, "step": 912300 }, { "loss": 2.83, "learning_rate": 2.680404427122411e-06, "epoch": 3.67058104123168, "total_flos": 3502881262618736640, "step": 912400 }, { "loss": 2.8575, "learning_rate": 2.679593301726481e-06, "epoch": 3.6709833407758747, "total_flos": 3503264713063495680, "step": 912500 }, { "loss": 2.885, "learning_rate": 2.6787821763305498e-06, "epoch": 3.6713856403200698, "total_flos": 3503642682306263040, "step": 912600 }, { "loss": 2.855, "learning_rate": 2.67797105093462e-06, "epoch": 3.671787939864264, "total_flos": 3504016912434493440, "step": 912700 }, { "loss": 2.8475, "learning_rate": 2.6771599255386886e-06, "epoch": 3.672190239408459, "total_flos": 3504407872975779840, "step": 912800 }, { "loss": 2.8475, "learning_rate": 2.6763488001427586e-06, "epoch": 3.6725925389526535, "total_flos": 3504790112457308160, "step": 912900 }, { "loss": 2.87, "learning_rate": 2.6755376747468274e-06, "epoch": 3.672994838496848, "total_flos": 3505179585850767360, "step": 913000 }, { "loss": 2.8425, "learning_rate": 2.6747265493508974e-06, "epoch": 3.6733971380410426, "total_flos": 3505564077299005440, "step": 913100 }, { "loss": 2.885, "learning_rate": 2.673915423954966e-06, "epoch": 3.673799437585237, "total_flos": 3505949811577927680, "step": 913200 }, { "loss": 2.81, "learning_rate": 2.6731042985590362e-06, "epoch": 3.6742017371294318, "total_flos": 3506353216359782400, "step": 913300 }, { "loss": 2.7975, "learning_rate": 2.672293173163105e-06, "epoch": 3.6746040366736263, "total_flos": 3506730803193108480, "step": 913400 }, { "loss": 2.815, "learning_rate": 2.671482047767175e-06, "epoch": 3.675006336217821, "total_flos": 3507122517930792960, "step": 913500 }, { "loss": 2.8425, "learning_rate": 2.670670922371244e-06, "epoch": 3.6754086357620155, "total_flos": 3507508900181268480, "step": 913600 }, { "loss": 2.8475, "learning_rate": 2.669859796975314e-06, "epoch": 3.6758109353062105, "total_flos": 3507896461527521280, "step": 913700 }, { "loss": 2.8325, "learning_rate": 2.6690486715793826e-06, "epoch": 3.676213234850405, "total_flos": 3508270048995440640, "step": 913800 }, { "loss": 2.7725, "learning_rate": 2.6682375461834527e-06, "epoch": 3.6766155343945996, "total_flos": 3508629965325834240, "step": 913900 }, { "loss": 2.805, "learning_rate": 2.667426420787522e-06, "epoch": 3.6770178339387942, "total_flos": 3509020421299107840, "step": 914000 }, { "loss": 2.8425, "learning_rate": 2.6666152953915915e-06, "epoch": 3.677420133482989, "total_flos": 3509409560084305920, "step": 914100 }, { "loss": 2.805, "learning_rate": 2.6658041699956607e-06, "epoch": 3.6778224330271834, "total_flos": 3509804158826526720, "step": 914200 }, { "loss": 2.8175, "learning_rate": 2.6649930445997303e-06, "epoch": 3.678224732571378, "total_flos": 3510192585905264640, "step": 914300 }, { "loss": 2.8125, "learning_rate": 2.6641819192037995e-06, "epoch": 3.6786270321155725, "total_flos": 3510569609746913280, "step": 914400 }, { "loss": 2.87, "learning_rate": 2.663370793807869e-06, "epoch": 3.679029331659767, "total_flos": 3510956894908569600, "step": 914500 }, { "loss": 2.87, "learning_rate": 2.6625596684119383e-06, "epoch": 3.679431631203962, "total_flos": 3511339166257551360, "step": 914600 }, { "loss": 2.8775, "learning_rate": 2.661748543016008e-06, "epoch": 3.6798339307481562, "total_flos": 3511710289309071360, "step": 914700 }, { "loss": 2.885, "learning_rate": 2.660937417620077e-06, "epoch": 3.6802362302923513, "total_flos": 3512101196737935360, "step": 914800 }, { "loss": 2.84, "learning_rate": 2.6601262922241468e-06, "epoch": 3.680638529836546, "total_flos": 3512494334888540160, "step": 914900 }, { "loss": 2.8275, "learning_rate": 2.659315166828216e-06, "epoch": 3.6810408293807404, "total_flos": 3512876691217397760, "step": 915000 }, { "loss": 2.82, "learning_rate": 2.6585040414322856e-06, "epoch": 3.681443128924935, "total_flos": 3513252928995194880, "step": 915100 }, { "loss": 2.8625, "learning_rate": 2.6576929160363548e-06, "epoch": 3.6818454284691295, "total_flos": 3513653481639966720, "step": 915200 }, { "loss": 2.8175, "learning_rate": 2.6568817906404244e-06, "epoch": 3.682247728013324, "total_flos": 3514045047662868480, "step": 915300 }, { "loss": 2.83, "learning_rate": 2.6560706652444936e-06, "epoch": 3.6826500275575187, "total_flos": 3514432056639928320, "step": 915400 }, { "loss": 2.8375, "learning_rate": 2.655259539848563e-06, "epoch": 3.6830523271017133, "total_flos": 3514811215600957440, "step": 915500 }, { "loss": 2.8625, "learning_rate": 2.654448414452633e-06, "epoch": 3.683454626645908, "total_flos": 3515191744862484480, "step": 915600 }, { "loss": 2.835, "learning_rate": 2.653637289056702e-06, "epoch": 3.683856926190103, "total_flos": 3515575577716684800, "step": 915700 }, { "loss": 2.7975, "learning_rate": 2.6528261636607716e-06, "epoch": 3.684259225734297, "total_flos": 3515962060880762880, "step": 915800 }, { "loss": 2.84, "learning_rate": 2.652015038264841e-06, "epoch": 3.684661525278492, "total_flos": 3516363564237895680, "step": 915900 }, { "loss": 2.85, "learning_rate": 2.6512039128689105e-06, "epoch": 3.6850638248226866, "total_flos": 3516746217996318720, "step": 916000 }, { "loss": 2.855, "learning_rate": 2.6503927874729797e-06, "epoch": 3.685466124366881, "total_flos": 3517128802708592640, "step": 916100 }, { "loss": 2.89, "learning_rate": 2.6495816620770493e-06, "epoch": 3.6858684239110757, "total_flos": 3517529764319016960, "step": 916200 }, { "loss": 2.855, "learning_rate": 2.6487705366811185e-06, "epoch": 3.6862707234552703, "total_flos": 3517906522598553600, "step": 916300 }, { "loss": 2.835, "learning_rate": 2.647959411285188e-06, "epoch": 3.686673022999465, "total_flos": 3518289144489523200, "step": 916400 }, { "loss": 2.8525, "learning_rate": 2.6471482858892573e-06, "epoch": 3.6870753225436594, "total_flos": 3518683328954849280, "step": 916500 }, { "loss": 2.84, "learning_rate": 2.646337160493327e-06, "epoch": 3.687477622087854, "total_flos": 3519063454561966080, "step": 916600 }, { "loss": 2.7975, "learning_rate": 2.645526035097396e-06, "epoch": 3.6878799216320486, "total_flos": 3519438125523302400, "step": 916700 }, { "loss": 2.8425, "learning_rate": 2.6447149097014657e-06, "epoch": 3.6882822211762436, "total_flos": 3519838375427266560, "step": 916800 }, { "loss": 2.86, "learning_rate": 2.643903784305535e-06, "epoch": 3.688684520720438, "total_flos": 3520217523765811200, "step": 916900 }, { "loss": 2.825, "learning_rate": 2.6430926589096045e-06, "epoch": 3.6890868202646327, "total_flos": 3520609063232501760, "step": 917000 }, { "loss": 2.8575, "learning_rate": 2.6422815335136737e-06, "epoch": 3.6894891198088273, "total_flos": 3520990654742476800, "step": 917100 }, { "loss": 2.785, "learning_rate": 2.6414704081177433e-06, "epoch": 3.689891419353022, "total_flos": 3521368894858598400, "step": 917200 }, { "loss": 2.87, "learning_rate": 2.6406592827218125e-06, "epoch": 3.6902937188972165, "total_flos": 3521766393539082240, "step": 917300 }, { "loss": 2.845, "learning_rate": 2.639848157325882e-06, "epoch": 3.690696018441411, "total_flos": 3522162086397204480, "step": 917400 }, { "loss": 2.8625, "learning_rate": 2.6390370319299514e-06, "epoch": 3.6910983179856056, "total_flos": 3522530585695057920, "step": 917500 }, { "loss": 2.7875, "learning_rate": 2.638225906534021e-06, "epoch": 3.6915006175298, "total_flos": 3522920340584355840, "step": 917600 }, { "loss": 2.8375, "learning_rate": 2.63741478113809e-06, "epoch": 3.691902917073995, "total_flos": 3523312304950425600, "step": 917700 }, { "loss": 2.875, "learning_rate": 2.63660365574216e-06, "epoch": 3.6923052166181893, "total_flos": 3523686731594618880, "step": 917800 }, { "loss": 2.8275, "learning_rate": 2.635792530346229e-06, "epoch": 3.6927075161623844, "total_flos": 3524080262777149440, "step": 917900 }, { "loss": 2.8675, "learning_rate": 2.6349814049502986e-06, "epoch": 3.693109815706579, "total_flos": 3524459979418613760, "step": 918000 }, { "loss": 2.8225, "learning_rate": 2.634170279554368e-06, "epoch": 3.6935121152507735, "total_flos": 3524852214658037760, "step": 918100 }, { "loss": 2.805, "learning_rate": 2.6333591541584374e-06, "epoch": 3.693914414794968, "total_flos": 3525234725012920320, "step": 918200 }, { "loss": 2.825, "learning_rate": 2.6325480287625066e-06, "epoch": 3.6943167143391626, "total_flos": 3525615185228298240, "step": 918300 }, { "loss": 2.82, "learning_rate": 2.6317369033665762e-06, "epoch": 3.694719013883357, "total_flos": 3526004478039521280, "step": 918400 }, { "loss": 2.7725, "learning_rate": 2.6309257779706454e-06, "epoch": 3.695121313427552, "total_flos": 3526401413728327680, "step": 918500 }, { "loss": 2.81, "learning_rate": 2.630114652574715e-06, "epoch": 3.6955236129717464, "total_flos": 3526805487726704640, "step": 918600 }, { "loss": 2.8325, "learning_rate": 2.6293035271787843e-06, "epoch": 3.695925912515941, "total_flos": 3527184078384814080, "step": 918700 }, { "loss": 2.8375, "learning_rate": 2.628492401782854e-06, "epoch": 3.696328212060136, "total_flos": 3527578114135357440, "step": 918800 }, { "loss": 2.785, "learning_rate": 2.627681276386923e-06, "epoch": 3.69673051160433, "total_flos": 3527949752377374720, "step": 918900 }, { "loss": 2.82, "learning_rate": 2.6268701509909927e-06, "epoch": 3.697132811148525, "total_flos": 3528338769004001280, "step": 919000 }, { "loss": 2.865, "learning_rate": 2.626059025595062e-06, "epoch": 3.6975351106927197, "total_flos": 3528716743558010880, "step": 919100 }, { "loss": 2.805, "learning_rate": 2.6252479001991315e-06, "epoch": 3.6979374102369142, "total_flos": 3529084111561267200, "step": 919200 }, { "loss": 2.885, "learning_rate": 2.6244367748032007e-06, "epoch": 3.698339709781109, "total_flos": 3529462144538941440, "step": 919300 }, { "loss": 2.7975, "learning_rate": 2.6236256494072703e-06, "epoch": 3.6987420093253034, "total_flos": 3529844771741153280, "step": 919400 }, { "loss": 2.875, "learning_rate": 2.6228145240113395e-06, "epoch": 3.699144308869498, "total_flos": 3530214981259008000, "step": 919500 }, { "loss": 2.845, "learning_rate": 2.622003398615409e-06, "epoch": 3.6995466084136925, "total_flos": 3530607407703152640, "step": 919600 }, { "loss": 2.82, "learning_rate": 2.6211922732194783e-06, "epoch": 3.6999489079578876, "total_flos": 3530997539690649600, "step": 919700 }, { "loss": 2.8625, "learning_rate": 2.620381147823548e-06, "epoch": 3.7003512075020817, "total_flos": 3531369416938567680, "step": 919800 }, { "loss": 2.7975, "learning_rate": 2.619570022427617e-06, "epoch": 3.7007535070462767, "total_flos": 3531751220898232320, "step": 919900 }, { "loss": 2.835, "learning_rate": 2.6187588970316868e-06, "epoch": 3.7011558065904713, "total_flos": 3532113638823720960, "step": 920000 }, { "loss": 2.8025, "learning_rate": 2.617947771635756e-06, "epoch": 3.701558106134666, "total_flos": 3532495070996428800, "step": 920100 }, { "loss": 2.8625, "learning_rate": 2.6171366462398256e-06, "epoch": 3.7019604056788604, "total_flos": 3532870549266585600, "step": 920200 }, { "loss": 2.8775, "learning_rate": 2.616325520843895e-06, "epoch": 3.702362705223055, "total_flos": 3533250754542336000, "step": 920300 }, { "loss": 2.8275, "learning_rate": 2.6155143954479644e-06, "epoch": 3.7027650047672496, "total_flos": 3533626461195909120, "step": 920400 }, { "loss": 2.8875, "learning_rate": 2.614703270052034e-06, "epoch": 3.703167304311444, "total_flos": 3533993234340034560, "step": 920500 }, { "loss": 2.8275, "learning_rate": 2.613892144656103e-06, "epoch": 3.7035696038556387, "total_flos": 3534381061248399360, "step": 920600 }, { "loss": 2.855, "learning_rate": 2.613081019260173e-06, "epoch": 3.7039719033998333, "total_flos": 3534759285430794240, "step": 920700 }, { "loss": 2.8275, "learning_rate": 2.612269893864242e-06, "epoch": 3.7043742029440283, "total_flos": 3535139804069836800, "step": 920800 }, { "loss": 2.89, "learning_rate": 2.6114587684683116e-06, "epoch": 3.7047765024882224, "total_flos": 3535521878902855680, "step": 920900 }, { "loss": 2.85, "learning_rate": 2.610647643072381e-06, "epoch": 3.7051788020324175, "total_flos": 3535896820737546240, "step": 921000 }, { "loss": 2.855, "learning_rate": 2.6098365176764505e-06, "epoch": 3.705581101576612, "total_flos": 3536271066799503360, "step": 921100 }, { "loss": 2.86, "learning_rate": 2.6090253922805197e-06, "epoch": 3.7059834011208066, "total_flos": 3536663689759610880, "step": 921200 }, { "loss": 2.7975, "learning_rate": 2.6082142668845893e-06, "epoch": 3.706385700665001, "total_flos": 3537038185449953280, "step": 921300 }, { "loss": 2.805, "learning_rate": 2.6074031414886585e-06, "epoch": 3.7067880002091957, "total_flos": 3537430452556830720, "step": 921400 }, { "loss": 2.89, "learning_rate": 2.606592016092728e-06, "epoch": 3.7071902997533903, "total_flos": 3537807582623324160, "step": 921500 }, { "loss": 2.865, "learning_rate": 2.6057808906967973e-06, "epoch": 3.707592599297585, "total_flos": 3538198798104238080, "step": 921600 }, { "loss": 2.805, "learning_rate": 2.604969765300867e-06, "epoch": 3.7079948988417795, "total_flos": 3538578748440360960, "step": 921700 }, { "loss": 2.88, "learning_rate": 2.604158639904936e-06, "epoch": 3.708397198385974, "total_flos": 3538953222885734400, "step": 921800 }, { "loss": 2.815, "learning_rate": 2.6033475145090057e-06, "epoch": 3.708799497930169, "total_flos": 3539340725808322560, "step": 921900 }, { "loss": 2.85, "learning_rate": 2.602536389113075e-06, "epoch": 3.7092017974743636, "total_flos": 3539724117829416960, "step": 922000 }, { "loss": 2.8625, "learning_rate": 2.6017252637171445e-06, "epoch": 3.709604097018558, "total_flos": 3540112088141322240, "step": 922100 }, { "loss": 2.86, "learning_rate": 2.6009141383212137e-06, "epoch": 3.7100063965627528, "total_flos": 3540493998325831680, "step": 922200 }, { "loss": 2.8275, "learning_rate": 2.6001030129252833e-06, "epoch": 3.7104086961069473, "total_flos": 3540889590270351360, "step": 922300 }, { "loss": 2.88, "learning_rate": 2.5992918875293525e-06, "epoch": 3.710810995651142, "total_flos": 3541280943843563520, "step": 922400 }, { "loss": 2.835, "learning_rate": 2.598480762133422e-06, "epoch": 3.7112132951953365, "total_flos": 3541661239410432000, "step": 922500 }, { "loss": 2.8625, "learning_rate": 2.5976696367374914e-06, "epoch": 3.711615594739531, "total_flos": 3542053782701905920, "step": 922600 }, { "loss": 2.7925, "learning_rate": 2.5968585113415614e-06, "epoch": 3.7120178942837256, "total_flos": 3542433313449891840, "step": 922700 }, { "loss": 2.8425, "learning_rate": 2.59604738594563e-06, "epoch": 3.7124201938279207, "total_flos": 3542807054943836160, "step": 922800 }, { "loss": 2.85, "learning_rate": 2.5952362605497002e-06, "epoch": 3.712822493372115, "total_flos": 3543186213904865280, "step": 922900 }, { "loss": 2.7875, "learning_rate": 2.594425135153769e-06, "epoch": 3.71322479291631, "total_flos": 3543541732526684160, "step": 923000 }, { "loss": 2.835, "learning_rate": 2.593614009757839e-06, "epoch": 3.7136270924605044, "total_flos": 3543923079719516160, "step": 923100 }, { "loss": 2.8575, "learning_rate": 2.592802884361908e-06, "epoch": 3.714029392004699, "total_flos": 3544318183029749760, "step": 923200 }, { "loss": 2.79, "learning_rate": 2.591991758965978e-06, "epoch": 3.7144316915488935, "total_flos": 3544711140598118400, "step": 923300 }, { "loss": 2.86, "learning_rate": 2.5911806335700466e-06, "epoch": 3.714833991093088, "total_flos": 3545078349264107520, "step": 923400 }, { "loss": 2.81, "learning_rate": 2.5903695081741167e-06, "epoch": 3.7152362906372827, "total_flos": 3545458214620354560, "step": 923500 }, { "loss": 2.795, "learning_rate": 2.5895583827781854e-06, "epoch": 3.7156385901814772, "total_flos": 3545839721150453760, "step": 923600 }, { "loss": 2.84, "learning_rate": 2.5887472573822555e-06, "epoch": 3.716040889725672, "total_flos": 3546216755614586880, "step": 923700 }, { "loss": 2.845, "learning_rate": 2.5879361319863243e-06, "epoch": 3.7164431892698664, "total_flos": 3546600737183569920, "step": 923800 }, { "loss": 2.8525, "learning_rate": 2.5871250065903943e-06, "epoch": 3.7168454888140614, "total_flos": 3546990943528458240, "step": 923900 }, { "loss": 2.87, "learning_rate": 2.586313881194463e-06, "epoch": 3.7172477883582555, "total_flos": 3547377230176573440, "step": 924000 }, { "loss": 2.8375, "learning_rate": 2.585502755798533e-06, "epoch": 3.7176500879024506, "total_flos": 3547760739044997120, "step": 924100 }, { "loss": 2.84, "learning_rate": 2.584691630402602e-06, "epoch": 3.718052387446645, "total_flos": 3548144858706278400, "step": 924200 }, { "loss": 2.8975, "learning_rate": 2.583880505006672e-06, "epoch": 3.7184546869908397, "total_flos": 3548537401997752320, "step": 924300 }, { "loss": 2.8275, "learning_rate": 2.5830693796107407e-06, "epoch": 3.7188569865350343, "total_flos": 3548920974601082880, "step": 924400 }, { "loss": 2.8375, "learning_rate": 2.5822582542148107e-06, "epoch": 3.719259286079229, "total_flos": 3549309603507025920, "step": 924500 }, { "loss": 2.7975, "learning_rate": 2.5814471288188795e-06, "epoch": 3.7196615856234234, "total_flos": 3549699639892162560, "step": 924600 }, { "loss": 2.835, "learning_rate": 2.5806360034229496e-06, "epoch": 3.720063885167618, "total_flos": 3550076870872258560, "step": 924700 }, { "loss": 2.8225, "learning_rate": 2.5798248780270183e-06, "epoch": 3.7204661847118126, "total_flos": 3550466726675159040, "step": 924800 }, { "loss": 2.825, "learning_rate": 2.5790137526310884e-06, "epoch": 3.720868484256007, "total_flos": 3550847457763891200, "step": 924900 }, { "loss": 2.8925, "learning_rate": 2.578202627235158e-06, "epoch": 3.721270783800202, "total_flos": 3551239432752445440, "step": 925000 }, { "loss": 2.825, "learning_rate": 2.577391501839227e-06, "epoch": 3.7216730833443967, "total_flos": 3551614066535086080, "step": 925100 }, { "loss": 2.8975, "learning_rate": 2.576580376443297e-06, "epoch": 3.7220753828885913, "total_flos": 3551997240795248640, "step": 925200 }, { "loss": 2.7825, "learning_rate": 2.575769251047366e-06, "epoch": 3.722477682432786, "total_flos": 3552379676792739840, "step": 925300 }, { "loss": 2.835, "learning_rate": 2.5749581256514356e-06, "epoch": 3.7228799819769804, "total_flos": 3552761958764206080, "step": 925400 }, { "loss": 2.8425, "learning_rate": 2.574147000255505e-06, "epoch": 3.723282281521175, "total_flos": 3553137012134983680, "step": 925500 }, { "loss": 2.7925, "learning_rate": 2.5733358748595744e-06, "epoch": 3.7236845810653696, "total_flos": 3553537182370314240, "step": 925600 }, { "loss": 2.83, "learning_rate": 2.5725247494636436e-06, "epoch": 3.724086880609564, "total_flos": 3553932402527877120, "step": 925700 }, { "loss": 2.85, "learning_rate": 2.5717136240677133e-06, "epoch": 3.7244891801537587, "total_flos": 3554314387069777920, "step": 925800 }, { "loss": 2.8675, "learning_rate": 2.5709024986717824e-06, "epoch": 3.7248914796979538, "total_flos": 3554668869999360000, "step": 925900 }, { "loss": 2.815, "learning_rate": 2.570091373275852e-06, "epoch": 3.725293779242148, "total_flos": 3555052888747038720, "step": 926000 }, { "loss": 2.8275, "learning_rate": 2.5692802478799213e-06, "epoch": 3.725696078786343, "total_flos": 3555443381899008000, "step": 926100 }, { "loss": 2.88, "learning_rate": 2.568469122483991e-06, "epoch": 3.7260983783305375, "total_flos": 3555826747363891200, "step": 926200 }, { "loss": 2.8075, "learning_rate": 2.56765799708806e-06, "epoch": 3.726500677874732, "total_flos": 3556217676037724160, "step": 926300 }, { "loss": 2.8725, "learning_rate": 2.5668468716921297e-06, "epoch": 3.7269029774189266, "total_flos": 3556598885138257920, "step": 926400 }, { "loss": 2.8625, "learning_rate": 2.566035746296199e-06, "epoch": 3.727305276963121, "total_flos": 3556969662959032320, "step": 926500 }, { "loss": 2.8425, "learning_rate": 2.5652246209002685e-06, "epoch": 3.7277075765073158, "total_flos": 3557350824258385920, "step": 926600 }, { "loss": 2.8675, "learning_rate": 2.5644134955043377e-06, "epoch": 3.7281098760515103, "total_flos": 3557740271095633920, "step": 926700 }, { "loss": 2.79, "learning_rate": 2.5636023701084073e-06, "epoch": 3.728512175595705, "total_flos": 3558119392877967360, "step": 926800 }, { "loss": 2.8175, "learning_rate": 2.5627912447124765e-06, "epoch": 3.7289144751398995, "total_flos": 3558502407800862720, "step": 926900 }, { "loss": 2.7975, "learning_rate": 2.561980119316546e-06, "epoch": 3.7293167746840945, "total_flos": 3558879293550213120, "step": 927000 }, { "loss": 2.89, "learning_rate": 2.5611689939206153e-06, "epoch": 3.7297190742282886, "total_flos": 3559259089860311040, "step": 927100 }, { "loss": 2.85, "learning_rate": 2.560357868524685e-06, "epoch": 3.7301213737724837, "total_flos": 3559658601501603840, "step": 927200 }, { "loss": 2.8475, "learning_rate": 2.559546743128754e-06, "epoch": 3.7305236733166782, "total_flos": 3560036910663874560, "step": 927300 }, { "loss": 2.87, "learning_rate": 2.5587356177328238e-06, "epoch": 3.730925972860873, "total_flos": 3560422432493107200, "step": 927400 }, { "loss": 2.875, "learning_rate": 2.557924492336893e-06, "epoch": 3.7313282724050674, "total_flos": 3560828800948131840, "step": 927500 }, { "loss": 2.865, "learning_rate": 2.5571133669409626e-06, "epoch": 3.731730571949262, "total_flos": 3561218380566435840, "step": 927600 }, { "loss": 2.83, "learning_rate": 2.5563022415450318e-06, "epoch": 3.7321328714934565, "total_flos": 3561607232544552960, "step": 927700 }, { "loss": 2.83, "learning_rate": 2.5554911161491014e-06, "epoch": 3.732535171037651, "total_flos": 3561987868030924800, "step": 927800 }, { "loss": 2.8475, "learning_rate": 2.5546799907531706e-06, "epoch": 3.732937470581846, "total_flos": 3562370946688727040, "step": 927900 }, { "loss": 2.8375, "learning_rate": 2.5538688653572402e-06, "epoch": 3.7333397701260402, "total_flos": 3562764430070077440, "step": 928000 }, { "loss": 2.84, "learning_rate": 2.5530577399613094e-06, "epoch": 3.7337420696702353, "total_flos": 3563147641508935680, "step": 928100 }, { "loss": 2.865, "learning_rate": 2.552246614565379e-06, "epoch": 3.73414436921443, "total_flos": 3563537900966246400, "step": 928200 }, { "loss": 2.885, "learning_rate": 2.5514354891694482e-06, "epoch": 3.7345466687586244, "total_flos": 3563915052277708800, "step": 928300 }, { "loss": 2.8075, "learning_rate": 2.550624363773518e-06, "epoch": 3.734948968302819, "total_flos": 3564298247782840320, "step": 928400 }, { "loss": 2.81, "learning_rate": 2.549813238377587e-06, "epoch": 3.7353512678470135, "total_flos": 3564677061513123840, "step": 928500 }, { "loss": 2.8375, "learning_rate": 2.5490021129816567e-06, "epoch": 3.735753567391208, "total_flos": 3565063305671301120, "step": 928600 }, { "loss": 2.87, "learning_rate": 2.548190987585726e-06, "epoch": 3.7361558669354027, "total_flos": 3565446023164631040, "step": 928700 }, { "loss": 2.7825, "learning_rate": 2.5473798621897955e-06, "epoch": 3.7365581664795973, "total_flos": 3565842040008529920, "step": 928800 }, { "loss": 2.855, "learning_rate": 2.5465687367938647e-06, "epoch": 3.736960466023792, "total_flos": 3566225384228444160, "step": 928900 }, { "loss": 2.865, "learning_rate": 2.5457576113979343e-06, "epoch": 3.737362765567987, "total_flos": 3566618458644142080, "step": 929000 }, { "loss": 2.8475, "learning_rate": 2.5449464860020035e-06, "epoch": 3.737765065112181, "total_flos": 3566994813269268480, "step": 929100 }, { "loss": 2.885, "learning_rate": 2.544135360606073e-06, "epoch": 3.738167364656376, "total_flos": 3567367003880478720, "step": 929200 }, { "loss": 2.8425, "learning_rate": 2.5433242352101423e-06, "epoch": 3.7385696642005706, "total_flos": 3567748547589273600, "step": 929300 }, { "loss": 2.8525, "learning_rate": 2.542513109814212e-06, "epoch": 3.738971963744765, "total_flos": 3568137797910558720, "step": 929400 }, { "loss": 2.81, "learning_rate": 2.541701984418281e-06, "epoch": 3.7393742632889597, "total_flos": 3568518640535377920, "step": 929500 }, { "loss": 2.81, "learning_rate": 2.5408908590223507e-06, "epoch": 3.7397765628331543, "total_flos": 3568900672878458880, "step": 929600 }, { "loss": 2.8675, "learning_rate": 2.54007973362642e-06, "epoch": 3.740178862377349, "total_flos": 3569281680151787520, "step": 929700 }, { "loss": 2.8775, "learning_rate": 2.5392686082304896e-06, "epoch": 3.7405811619215434, "total_flos": 3569668917512263680, "step": 929800 }, { "loss": 2.8425, "learning_rate": 2.538457482834559e-06, "epoch": 3.740983461465738, "total_flos": 3570066511795107840, "step": 929900 }, { "loss": 2.8175, "learning_rate": 2.5376463574386284e-06, "epoch": 3.7413857610099326, "total_flos": 3570456096724654080, "step": 930000 }, { "loss": 2.8025, "learning_rate": 2.536835232042698e-06, "epoch": 3.7417880605541276, "total_flos": 3570832716911892480, "step": 930100 }, { "loss": 2.8475, "learning_rate": 2.536024106646767e-06, "epoch": 3.7421903600983217, "total_flos": 3571211466907269120, "step": 930200 }, { "loss": 2.825, "learning_rate": 2.535212981250837e-06, "epoch": 3.7425926596425168, "total_flos": 3571603303803525120, "step": 930300 }, { "loss": 2.86, "learning_rate": 2.534401855854906e-06, "epoch": 3.7429949591867113, "total_flos": 3571984709420021760, "step": 930400 }, { "loss": 2.795, "learning_rate": 2.5335907304589756e-06, "epoch": 3.743397258730906, "total_flos": 3572364548220057600, "step": 930500 }, { "loss": 2.82, "learning_rate": 2.532779605063045e-06, "epoch": 3.7437995582751005, "total_flos": 3572737896682076160, "step": 930600 }, { "loss": 2.835, "learning_rate": 2.5319684796671144e-06, "epoch": 3.744201857819295, "total_flos": 3573127826842368000, "step": 930700 }, { "loss": 2.8425, "learning_rate": 2.5311573542711836e-06, "epoch": 3.7446041573634896, "total_flos": 3573502577472337920, "step": 930800 }, { "loss": 2.855, "learning_rate": 2.5303462288752533e-06, "epoch": 3.745006456907684, "total_flos": 3573891408205486080, "step": 930900 }, { "loss": 2.85, "learning_rate": 2.5295351034793224e-06, "epoch": 3.745408756451879, "total_flos": 3574278475606210560, "step": 931000 }, { "loss": 2.8425, "learning_rate": 2.528723978083392e-06, "epoch": 3.7458110559960733, "total_flos": 3574667056710973440, "step": 931100 }, { "loss": 2.8625, "learning_rate": 2.5279128526874613e-06, "epoch": 3.7462133555402684, "total_flos": 3575051760608901120, "step": 931200 }, { "loss": 2.835, "learning_rate": 2.527101727291531e-06, "epoch": 3.746615655084463, "total_flos": 3575433612369745920, "step": 931300 }, { "loss": 2.875, "learning_rate": 2.5262906018956e-06, "epoch": 3.7470179546286575, "total_flos": 3575824971254200320, "step": 931400 }, { "loss": 2.825, "learning_rate": 2.5254794764996697e-06, "epoch": 3.747420254172852, "total_flos": 3576232343534008320, "step": 931500 }, { "loss": 2.8675, "learning_rate": 2.524668351103739e-06, "epoch": 3.7478225537170466, "total_flos": 3576599392862730240, "step": 931600 }, { "loss": 2.87, "learning_rate": 2.5238572257078085e-06, "epoch": 3.748224853261241, "total_flos": 3577002813578311680, "step": 931700 }, { "loss": 2.815, "learning_rate": 2.5230461003118777e-06, "epoch": 3.748627152805436, "total_flos": 3577389997826365440, "step": 931800 }, { "loss": 2.7925, "learning_rate": 2.5222349749159473e-06, "epoch": 3.7490294523496304, "total_flos": 3577767706818263040, "step": 931900 }, { "loss": 2.7975, "learning_rate": 2.5214238495200165e-06, "epoch": 3.749431751893825, "total_flos": 3578173411368007680, "step": 932000 }, { "loss": 2.8225, "learning_rate": 2.520612724124086e-06, "epoch": 3.74983405143802, "total_flos": 3578547450291517440, "step": 932100 }, { "loss": 2.7825, "learning_rate": 2.5198015987281553e-06, "epoch": 3.750236350982214, "total_flos": 3578925377044346880, "step": 932200 }, { "loss": 2.81, "learning_rate": 2.518990473332225e-06, "epoch": 3.750638650526409, "total_flos": 3579307696194508800, "step": 932300 }, { "loss": 2.7825, "learning_rate": 2.518179347936294e-06, "epoch": 3.7510409500706037, "total_flos": 3579693212712499200, "step": 932400 }, { "loss": 2.8425, "learning_rate": 2.5173682225403638e-06, "epoch": 3.7514432496147982, "total_flos": 3580082388676392960, "step": 932500 }, { "loss": 2.8425, "learning_rate": 2.516557097144433e-06, "epoch": 3.751845549158993, "total_flos": 3580462211542702080, "step": 932600 }, { "loss": 2.8975, "learning_rate": 2.5157459717485026e-06, "epoch": 3.7522478487031874, "total_flos": 3580850208410818560, "step": 932700 }, { "loss": 2.85, "learning_rate": 2.5149348463525718e-06, "epoch": 3.752650148247382, "total_flos": 3581240998992353280, "step": 932800 }, { "loss": 2.8025, "learning_rate": 2.5141237209566414e-06, "epoch": 3.7530524477915765, "total_flos": 3581637568205445120, "step": 932900 }, { "loss": 2.8225, "learning_rate": 2.5133125955607106e-06, "epoch": 3.753454747335771, "total_flos": 3582037664083384320, "step": 933000 }, { "loss": 2.8575, "learning_rate": 2.5125014701647802e-06, "epoch": 3.7538570468799657, "total_flos": 3582413646921553920, "step": 933100 }, { "loss": 2.8425, "learning_rate": 2.5116903447688494e-06, "epoch": 3.7542593464241607, "total_flos": 3582802562634577920, "step": 933200 }, { "loss": 2.835, "learning_rate": 2.510879219372919e-06, "epoch": 3.7546616459683553, "total_flos": 3583182603261818880, "step": 933300 }, { "loss": 2.875, "learning_rate": 2.5100680939769882e-06, "epoch": 3.75506394551255, "total_flos": 3583570111495649280, "step": 933400 }, { "loss": 2.865, "learning_rate": 2.509256968581058e-06, "epoch": 3.7554662450567444, "total_flos": 3583953944349849600, "step": 933500 }, { "loss": 2.8825, "learning_rate": 2.508445843185127e-06, "epoch": 3.755868544600939, "total_flos": 3584336088229017600, "step": 933600 }, { "loss": 2.8225, "learning_rate": 2.5076347177891967e-06, "epoch": 3.7562708441451336, "total_flos": 3584723931071109120, "step": 933700 }, { "loss": 2.8225, "learning_rate": 2.506823592393266e-06, "epoch": 3.756673143689328, "total_flos": 3585126650702714880, "step": 933800 }, { "loss": 2.8275, "learning_rate": 2.5060124669973355e-06, "epoch": 3.7570754432335227, "total_flos": 3585542515658864640, "step": 933900 }, { "loss": 2.7975, "learning_rate": 2.5052013416014047e-06, "epoch": 3.7574777427777173, "total_flos": 3585918503808276480, "step": 934000 }, { "loss": 2.8025, "learning_rate": 2.5043902162054747e-06, "epoch": 3.7578800423219123, "total_flos": 3586300849514649600, "step": 934100 }, { "loss": 2.835, "learning_rate": 2.5035790908095435e-06, "epoch": 3.7582823418661064, "total_flos": 3586682834056550400, "step": 934200 }, { "loss": 2.835, "learning_rate": 2.5027679654136135e-06, "epoch": 3.7586846414103015, "total_flos": 3587087120504616960, "step": 934300 }, { "loss": 2.855, "learning_rate": 2.5019568400176823e-06, "epoch": 3.759086940954496, "total_flos": 3587479345121556480, "step": 934400 }, { "loss": 2.8375, "learning_rate": 2.5011457146217523e-06, "epoch": 3.7594892404986906, "total_flos": 3587861446510786560, "step": 934500 }, { "loss": 2.79, "learning_rate": 2.500334589225822e-06, "epoch": 3.759891540042885, "total_flos": 3588271065446062080, "step": 934600 }, { "loss": 2.7775, "learning_rate": 2.499523463829891e-06, "epoch": 3.7602938395870797, "total_flos": 3588658361230202880, "step": 934700 }, { "loss": 2.87, "learning_rate": 2.4987123384339604e-06, "epoch": 3.7606961391312743, "total_flos": 3589052970594908160, "step": 934800 }, { "loss": 2.8325, "learning_rate": 2.49790121303803e-06, "epoch": 3.761098438675469, "total_flos": 3589455541511731200, "step": 934900 }, { "loss": 2.865, "learning_rate": 2.497090087642099e-06, "epoch": 3.7615007382196635, "total_flos": 3589835449357916160, "step": 935000 }, { "loss": 2.845, "learning_rate": 2.496278962246169e-06, "epoch": 3.761903037763858, "total_flos": 3590211761493104640, "step": 935100 }, { "loss": 2.7925, "learning_rate": 2.495467836850238e-06, "epoch": 3.762305337308053, "total_flos": 3590595573102336000, "step": 935200 }, { "loss": 2.7675, "learning_rate": 2.4946567114543076e-06, "epoch": 3.762707636852247, "total_flos": 3590971842747586560, "step": 935300 }, { "loss": 2.8375, "learning_rate": 2.493845586058377e-06, "epoch": 3.763109936396442, "total_flos": 3591357088392222720, "step": 935400 }, { "loss": 2.7925, "learning_rate": 2.4930344606624464e-06, "epoch": 3.7635122359406368, "total_flos": 3591717902322554880, "step": 935500 }, { "loss": 2.87, "learning_rate": 2.4922233352665156e-06, "epoch": 3.7639145354848313, "total_flos": 3592093141586810880, "step": 935600 }, { "loss": 2.87, "learning_rate": 2.4914122098705852e-06, "epoch": 3.764316835029026, "total_flos": 3592474440978462720, "step": 935700 }, { "loss": 2.755, "learning_rate": 2.4906010844746544e-06, "epoch": 3.7647191345732205, "total_flos": 3592845935816939520, "step": 935800 }, { "loss": 2.7825, "learning_rate": 2.489789959078724e-06, "epoch": 3.765121434117415, "total_flos": 3593236386478970880, "step": 935900 }, { "loss": 2.82, "learning_rate": 2.4889788336827933e-06, "epoch": 3.7655237336616096, "total_flos": 3593627134570567680, "step": 936000 }, { "loss": 2.805, "learning_rate": 2.488167708286863e-06, "epoch": 3.765926033205804, "total_flos": 3594021982941173760, "step": 936100 }, { "loss": 2.81, "learning_rate": 2.487356582890932e-06, "epoch": 3.766328332749999, "total_flos": 3594410271927613440, "step": 936200 }, { "loss": 2.8175, "learning_rate": 2.4865454574950017e-06, "epoch": 3.766730632294194, "total_flos": 3594801917619148800, "step": 936300 }, { "loss": 2.84, "learning_rate": 2.485734332099071e-06, "epoch": 3.7671329318383884, "total_flos": 3595171819084953600, "step": 936400 }, { "loss": 2.8575, "learning_rate": 2.4849232067031405e-06, "epoch": 3.767535231382583, "total_flos": 3595542549104547840, "step": 936500 }, { "loss": 2.85, "learning_rate": 2.4841120813072097e-06, "epoch": 3.7679375309267775, "total_flos": 3595925579961169920, "step": 936600 }, { "loss": 2.795, "learning_rate": 2.4833009559112793e-06, "epoch": 3.768339830470972, "total_flos": 3596332123687188480, "step": 936700 }, { "loss": 2.8225, "learning_rate": 2.4824898305153485e-06, "epoch": 3.7687421300151667, "total_flos": 3596713311542753280, "step": 936800 }, { "loss": 2.8675, "learning_rate": 2.481678705119418e-06, "epoch": 3.7691444295593612, "total_flos": 3597088056861480960, "step": 936900 }, { "loss": 2.7975, "learning_rate": 2.4808675797234878e-06, "epoch": 3.769546729103556, "total_flos": 3597469579325306880, "step": 937000 }, { "loss": 2.8175, "learning_rate": 2.480056454327557e-06, "epoch": 3.7699490286477504, "total_flos": 3597846571299502080, "step": 937100 }, { "loss": 2.8675, "learning_rate": 2.4792453289316266e-06, "epoch": 3.7703513281919454, "total_flos": 3598229803983329280, "step": 937200 }, { "loss": 2.86, "learning_rate": 2.4784342035356958e-06, "epoch": 3.7707536277361395, "total_flos": 3598620876060702720, "step": 937300 }, { "loss": 2.7925, "learning_rate": 2.4776230781397654e-06, "epoch": 3.7711559272803346, "total_flos": 3599002786245212160, "step": 937400 }, { "loss": 2.83, "learning_rate": 2.4768119527438346e-06, "epoch": 3.771558226824529, "total_flos": 3599384924813137920, "step": 937500 }, { "loss": 2.88, "learning_rate": 2.476000827347904e-06, "epoch": 3.7719605263687237, "total_flos": 3599770499754792960, "step": 937600 }, { "loss": 2.82, "learning_rate": 2.4751897019519734e-06, "epoch": 3.7723628259129183, "total_flos": 3600151151174891520, "step": 937700 }, { "loss": 2.85, "learning_rate": 2.474378576556043e-06, "epoch": 3.772765125457113, "total_flos": 3600556250243020800, "step": 937800 }, { "loss": 2.825, "learning_rate": 2.473567451160112e-06, "epoch": 3.7731674250013074, "total_flos": 3600935345469143040, "step": 937900 }, { "loss": 2.8125, "learning_rate": 2.472756325764182e-06, "epoch": 3.773569724545502, "total_flos": 3601310239502653440, "step": 938000 }, { "loss": 2.8, "learning_rate": 2.471945200368251e-06, "epoch": 3.7739720240896966, "total_flos": 3601681861810944000, "step": 938100 }, { "loss": 2.875, "learning_rate": 2.4711340749723206e-06, "epoch": 3.774374323633891, "total_flos": 3602055714840975360, "step": 938200 }, { "loss": 2.83, "learning_rate": 2.47032294957639e-06, "epoch": 3.774776623178086, "total_flos": 3602445841517230080, "step": 938300 }, { "loss": 2.89, "learning_rate": 2.4695118241804595e-06, "epoch": 3.7751789227222803, "total_flos": 3602833503777085440, "step": 938400 }, { "loss": 2.7975, "learning_rate": 2.4687006987845287e-06, "epoch": 3.7755812222664753, "total_flos": 3603229355972474880, "step": 938500 }, { "loss": 2.8675, "learning_rate": 2.4678895733885983e-06, "epoch": 3.77598352181067, "total_flos": 3603595879488215040, "step": 938600 }, { "loss": 2.7925, "learning_rate": 2.4670784479926675e-06, "epoch": 3.7763858213548644, "total_flos": 3603978883788625920, "step": 938700 }, { "loss": 2.78, "learning_rate": 2.466267322596737e-06, "epoch": 3.776788120899059, "total_flos": 3604368952041216000, "step": 938800 }, { "loss": 2.8575, "learning_rate": 2.4654561972008063e-06, "epoch": 3.7771904204432536, "total_flos": 3604736559050373120, "step": 938900 }, { "loss": 2.785, "learning_rate": 2.464645071804876e-06, "epoch": 3.777592719987448, "total_flos": 3605094637690951680, "step": 939000 }, { "loss": 2.765, "learning_rate": 2.463833946408945e-06, "epoch": 3.7779950195316427, "total_flos": 3605469876955207680, "step": 939100 }, { "loss": 2.795, "learning_rate": 2.4630228210130147e-06, "epoch": 3.7783973190758378, "total_flos": 3605833601446287360, "step": 939200 }, { "loss": 2.825, "learning_rate": 2.462211695617084e-06, "epoch": 3.778799618620032, "total_flos": 3606236926559508480, "step": 939300 }, { "loss": 2.825, "learning_rate": 2.4614005702211535e-06, "epoch": 3.779201918164227, "total_flos": 3606626033477253120, "step": 939400 }, { "loss": 2.8175, "learning_rate": 2.4605894448252227e-06, "epoch": 3.7796042177084215, "total_flos": 3607028460990535680, "step": 939500 }, { "loss": 2.8925, "learning_rate": 2.4597783194292923e-06, "epoch": 3.780006517252616, "total_flos": 3607417615709460480, "step": 939600 }, { "loss": 2.86, "learning_rate": 2.458967194033362e-06, "epoch": 3.7804088167968106, "total_flos": 3607796132010178560, "step": 939700 }, { "loss": 2.8175, "learning_rate": 2.458156068637431e-06, "epoch": 3.780811116341005, "total_flos": 3608176045167605760, "step": 939800 }, { "loss": 2.895, "learning_rate": 2.4573449432415008e-06, "epoch": 3.7812134158851998, "total_flos": 3608551199451985920, "step": 939900 }, { "loss": 2.8125, "learning_rate": 2.45653381784557e-06, "epoch": 3.7816157154293943, "total_flos": 3608930544306493440, "step": 940000 }, { "loss": 2.8725, "learning_rate": 2.4557226924496396e-06, "epoch": 3.782018014973589, "total_flos": 3609309676711311360, "step": 940100 }, { "loss": 2.8725, "learning_rate": 2.454911567053709e-06, "epoch": 3.7824203145177835, "total_flos": 3609705725422663680, "step": 940200 }, { "loss": 2.7975, "learning_rate": 2.4541004416577784e-06, "epoch": 3.7828226140619785, "total_flos": 3610091741197424640, "step": 940300 }, { "loss": 2.8225, "learning_rate": 2.4532893162618476e-06, "epoch": 3.7832249136061726, "total_flos": 3610491146613872640, "step": 940400 }, { "loss": 2.855, "learning_rate": 2.4524781908659172e-06, "epoch": 3.7836272131503677, "total_flos": 3610870725163038720, "step": 940500 }, { "loss": 2.79, "learning_rate": 2.4516670654699864e-06, "epoch": 3.7840295126945622, "total_flos": 3611258467091527680, "step": 940600 }, { "loss": 2.8175, "learning_rate": 2.450855940074056e-06, "epoch": 3.784431812238757, "total_flos": 3611641439524485120, "step": 940700 }, { "loss": 2.7925, "learning_rate": 2.4500448146781252e-06, "epoch": 3.7848341117829514, "total_flos": 3612022075010856960, "step": 940800 }, { "loss": 2.8275, "learning_rate": 2.449233689282195e-06, "epoch": 3.785236411327146, "total_flos": 3612403241621452800, "step": 940900 }, { "loss": 2.815, "learning_rate": 2.448422563886264e-06, "epoch": 3.7856387108713405, "total_flos": 3612765107177748480, "step": 941000 }, { "loss": 2.81, "learning_rate": 2.4476114384903337e-06, "epoch": 3.786041010415535, "total_flos": 3613139762205358080, "step": 941100 }, { "loss": 2.875, "learning_rate": 2.446800313094403e-06, "epoch": 3.7864433099597297, "total_flos": 3613532114292111360, "step": 941200 }, { "loss": 2.855, "learning_rate": 2.4459891876984725e-06, "epoch": 3.7868456095039242, "total_flos": 3613927467230730240, "step": 941300 }, { "loss": 2.84, "learning_rate": 2.4451780623025417e-06, "epoch": 3.7872479090481193, "total_flos": 3614303917458216960, "step": 941400 }, { "loss": 2.8025, "learning_rate": 2.4443669369066113e-06, "epoch": 3.787650208592314, "total_flos": 3614673500249487360, "step": 941500 }, { "loss": 2.8125, "learning_rate": 2.443555811510681e-06, "epoch": 3.7880525081365084, "total_flos": 3615055681307351040, "step": 941600 }, { "loss": 2.86, "learning_rate": 2.44274468611475e-06, "epoch": 3.788454807680703, "total_flos": 3615464354841507840, "step": 941700 }, { "loss": 2.825, "learning_rate": 2.4419335607188197e-06, "epoch": 3.7888571072248975, "total_flos": 3615827075507804160, "step": 941800 }, { "loss": 2.845, "learning_rate": 2.441122435322889e-06, "epoch": 3.789259406769092, "total_flos": 3616201629621811200, "step": 941900 }, { "loss": 2.825, "learning_rate": 2.4403113099269586e-06, "epoch": 3.7896617063132867, "total_flos": 3616599085812357120, "step": 942000 }, { "loss": 2.86, "learning_rate": 2.4395001845310278e-06, "epoch": 3.7900640058574813, "total_flos": 3616979721298728960, "step": 942100 }, { "loss": 2.865, "learning_rate": 2.4386890591350974e-06, "epoch": 3.790466305401676, "total_flos": 3617349203176396800, "step": 942200 }, { "loss": 2.9, "learning_rate": 2.4378779337391666e-06, "epoch": 3.790868604945871, "total_flos": 3617729280982333440, "step": 942300 }, { "loss": 2.805, "learning_rate": 2.437066808343236e-06, "epoch": 3.791270904490065, "total_flos": 3618105640918702080, "step": 942400 }, { "loss": 2.8925, "learning_rate": 2.4362556829473054e-06, "epoch": 3.79167320403426, "total_flos": 3618481756537927680, "step": 942500 }, { "loss": 2.8975, "learning_rate": 2.435444557551375e-06, "epoch": 3.7920755035784546, "total_flos": 3618861127948646400, "step": 942600 }, { "loss": 2.81, "learning_rate": 2.434633432155444e-06, "epoch": 3.792477803122649, "total_flos": 3619243351496448000, "step": 942700 }, { "loss": 2.8775, "learning_rate": 2.433822306759514e-06, "epoch": 3.7928801026668437, "total_flos": 3619630997822576640, "step": 942800 }, { "loss": 2.7975, "learning_rate": 2.433011181363583e-06, "epoch": 3.7932824022110383, "total_flos": 3620007926061864960, "step": 942900 }, { "loss": 2.8, "learning_rate": 2.4322000559676526e-06, "epoch": 3.793684701755233, "total_flos": 3620358531784611840, "step": 943000 }, { "loss": 2.875, "learning_rate": 2.431388930571722e-06, "epoch": 3.7940870012994274, "total_flos": 3620778539509708800, "step": 943100 }, { "loss": 2.7975, "learning_rate": 2.4305778051757914e-06, "epoch": 3.794489300843622, "total_flos": 3621159971682416640, "step": 943200 }, { "loss": 2.8475, "learning_rate": 2.4297666797798606e-06, "epoch": 3.7948916003878166, "total_flos": 3621549864664012800, "step": 943300 }, { "loss": 2.84, "learning_rate": 2.4289555543839303e-06, "epoch": 3.7952938999320116, "total_flos": 3621925311066716160, "step": 943400 }, { "loss": 2.8775, "learning_rate": 2.4281444289879995e-06, "epoch": 3.7956961994762057, "total_flos": 3622319899186452480, "step": 943500 }, { "loss": 2.85, "learning_rate": 2.427333303592069e-06, "epoch": 3.7960984990204008, "total_flos": 3622686380212254720, "step": 943600 }, { "loss": 2.85, "learning_rate": 2.4265221781961383e-06, "epoch": 3.7965007985645953, "total_flos": 3623077956857640960, "step": 943700 }, { "loss": 2.8275, "learning_rate": 2.425711052800208e-06, "epoch": 3.79690309810879, "total_flos": 3623484165975398400, "step": 943800 }, { "loss": 2.81, "learning_rate": 2.424899927404277e-06, "epoch": 3.7973053976529845, "total_flos": 3623854619810396160, "step": 943900 }, { "loss": 2.7975, "learning_rate": 2.4240888020083467e-06, "epoch": 3.797707697197179, "total_flos": 3624241819992176640, "step": 944000 }, { "loss": 2.8325, "learning_rate": 2.423277676612416e-06, "epoch": 3.7981099967413736, "total_flos": 3624612889931274240, "step": 944100 }, { "loss": 2.8225, "learning_rate": 2.4224665512164855e-06, "epoch": 3.798512296285568, "total_flos": 3625005592560015360, "step": 944200 }, { "loss": 2.8325, "learning_rate": 2.4216554258205547e-06, "epoch": 3.7989145958297628, "total_flos": 3625388634039121920, "step": 944300 }, { "loss": 2.9025, "learning_rate": 2.4208443004246243e-06, "epoch": 3.7993168953739573, "total_flos": 3625781326045378560, "step": 944400 }, { "loss": 2.8325, "learning_rate": 2.4200331750286935e-06, "epoch": 3.7997191949181524, "total_flos": 3626185745274501120, "step": 944500 }, { "loss": 2.8975, "learning_rate": 2.419222049632763e-06, "epoch": 3.800121494462347, "total_flos": 3626571075899013120, "step": 944600 }, { "loss": 2.8725, "learning_rate": 2.4184109242368323e-06, "epoch": 3.8005237940065415, "total_flos": 3626947812933580800, "step": 944700 }, { "loss": 2.845, "learning_rate": 2.417599798840902e-06, "epoch": 3.800926093550736, "total_flos": 3627342135491205120, "step": 944800 }, { "loss": 2.82, "learning_rate": 2.416788673444971e-06, "epoch": 3.8013283930949306, "total_flos": 3627748472078776320, "step": 944900 }, { "loss": 2.88, "learning_rate": 2.4159775480490408e-06, "epoch": 3.801730692639125, "total_flos": 3628146905537894400, "step": 945000 }, { "loss": 2.815, "learning_rate": 2.41516642265311e-06, "epoch": 3.80213299218332, "total_flos": 3628525087230351360, "step": 945100 }, { "loss": 2.885, "learning_rate": 2.4143552972571796e-06, "epoch": 3.8025352917275144, "total_flos": 3628923435709593600, "step": 945200 }, { "loss": 2.87, "learning_rate": 2.4135441718612492e-06, "epoch": 3.802937591271709, "total_flos": 3629315166381004800, "step": 945300 }, { "loss": 2.795, "learning_rate": 2.4127330464653184e-06, "epoch": 3.803339890815904, "total_flos": 3629709111840430080, "step": 945400 }, { "loss": 2.8175, "learning_rate": 2.411921921069388e-06, "epoch": 3.803742190360098, "total_flos": 3630090751151585280, "step": 945500 }, { "loss": 2.7875, "learning_rate": 2.4111107956734572e-06, "epoch": 3.804144489904293, "total_flos": 3630471519419013120, "step": 945600 }, { "loss": 2.7775, "learning_rate": 2.410299670277527e-06, "epoch": 3.8045467894484877, "total_flos": 3630858300012656640, "step": 945700 }, { "loss": 2.8625, "learning_rate": 2.409488544881596e-06, "epoch": 3.8049490889926822, "total_flos": 3631238457487226880, "step": 945800 }, { "loss": 2.835, "learning_rate": 2.4086774194856657e-06, "epoch": 3.805351388536877, "total_flos": 3631628286733916160, "step": 945900 }, { "loss": 2.835, "learning_rate": 2.407866294089735e-06, "epoch": 3.8057536880810714, "total_flos": 3632019422546196480, "step": 946000 }, { "loss": 2.8775, "learning_rate": 2.4070551686938045e-06, "epoch": 3.806155987625266, "total_flos": 3632415864289474560, "step": 946100 }, { "loss": 2.8475, "learning_rate": 2.4062440432978737e-06, "epoch": 3.8065582871694605, "total_flos": 3632803887713802240, "step": 946200 }, { "loss": 2.8275, "learning_rate": 2.4054329179019433e-06, "epoch": 3.806960586713655, "total_flos": 3633192123587819520, "step": 946300 }, { "loss": 2.77, "learning_rate": 2.404621792506013e-06, "epoch": 3.8073628862578497, "total_flos": 3633566475874621440, "step": 946400 }, { "loss": 2.7775, "learning_rate": 2.403810667110082e-06, "epoch": 3.8077651858020447, "total_flos": 3633954839218452480, "step": 946500 }, { "loss": 2.865, "learning_rate": 2.4029995417141517e-06, "epoch": 3.808167485346239, "total_flos": 3634347690561976320, "step": 946600 }, { "loss": 2.8475, "learning_rate": 2.402188416318221e-06, "epoch": 3.808569784890434, "total_flos": 3634723652155176960, "step": 946700 }, { "loss": 2.845, "learning_rate": 2.4013772909222905e-06, "epoch": 3.8089720844346284, "total_flos": 3635125893774981120, "step": 946800 }, { "loss": 2.845, "learning_rate": 2.4005661655263597e-06, "epoch": 3.809374383978823, "total_flos": 3635522691371489280, "step": 946900 }, { "loss": 2.7875, "learning_rate": 2.3997550401304294e-06, "epoch": 3.8097766835230176, "total_flos": 3635907782990100480, "step": 947000 }, { "loss": 2.8425, "learning_rate": 2.3989439147344986e-06, "epoch": 3.810178983067212, "total_flos": 3636298127427287040, "step": 947100 }, { "loss": 2.8575, "learning_rate": 2.398132789338568e-06, "epoch": 3.8105812826114067, "total_flos": 3636683001284966400, "step": 947200 }, { "loss": 2.8125, "learning_rate": 2.3973216639426374e-06, "epoch": 3.8109835821556013, "total_flos": 3637055175962449920, "step": 947300 }, { "loss": 2.8175, "learning_rate": 2.396510538546707e-06, "epoch": 3.8113858816997963, "total_flos": 3637437542913792000, "step": 947400 }, { "loss": 2.8325, "learning_rate": 2.395699413150776e-06, "epoch": 3.8117881812439904, "total_flos": 3637818736080599040, "step": 947500 }, { "loss": 2.8125, "learning_rate": 2.394888287754846e-06, "epoch": 3.8121904807881855, "total_flos": 3638201405772748800, "step": 947600 }, { "loss": 2.825, "learning_rate": 2.394077162358915e-06, "epoch": 3.81259278033238, "total_flos": 3638580617846200320, "step": 947700 }, { "loss": 2.8425, "learning_rate": 2.3932660369629846e-06, "epoch": 3.8129950798765746, "total_flos": 3638960929346795520, "step": 947800 }, { "loss": 2.8, "learning_rate": 2.392454911567054e-06, "epoch": 3.813397379420769, "total_flos": 3639345362371368960, "step": 947900 }, { "loss": 2.82, "learning_rate": 2.3916437861711234e-06, "epoch": 3.8137996789649637, "total_flos": 3639720091756369920, "step": 948000 }, { "loss": 2.8025, "learning_rate": 2.3908326607751926e-06, "epoch": 3.8142019785091583, "total_flos": 3640106017240012800, "step": 948100 }, { "loss": 2.89, "learning_rate": 2.3900215353792623e-06, "epoch": 3.814604278053353, "total_flos": 3640488607263528960, "step": 948200 }, { "loss": 2.8125, "learning_rate": 2.3892104099833314e-06, "epoch": 3.8150065775975475, "total_flos": 3640866358745364480, "step": 948300 }, { "loss": 2.825, "learning_rate": 2.388399284587401e-06, "epoch": 3.815408877141742, "total_flos": 3641229706138245120, "step": 948400 }, { "loss": 2.85, "learning_rate": 2.3875881591914703e-06, "epoch": 3.815811176685937, "total_flos": 3641613597416110080, "step": 948500 }, { "loss": 2.825, "learning_rate": 2.38677703379554e-06, "epoch": 3.816213476230131, "total_flos": 3641998200400435200, "step": 948600 }, { "loss": 2.86, "learning_rate": 2.385965908399609e-06, "epoch": 3.816615775774326, "total_flos": 3642373221903759360, "step": 948700 }, { "loss": 2.82, "learning_rate": 2.3851547830036787e-06, "epoch": 3.8170180753185208, "total_flos": 3642771565071759360, "step": 948800 }, { "loss": 2.8075, "learning_rate": 2.384343657607748e-06, "epoch": 3.8174203748627153, "total_flos": 3643157984500930560, "step": 948900 }, { "loss": 2.8325, "learning_rate": 2.3835325322118175e-06, "epoch": 3.81782267440691, "total_flos": 3643530244158289920, "step": 949000 }, { "loss": 2.8125, "learning_rate": 2.3827214068158867e-06, "epoch": 3.8182249739511045, "total_flos": 3643914092946216960, "step": 949100 }, { "loss": 2.8725, "learning_rate": 2.3819102814199563e-06, "epoch": 3.818627273495299, "total_flos": 3644285046037985280, "step": 949200 }, { "loss": 2.835, "learning_rate": 2.3810991560240255e-06, "epoch": 3.8190295730394936, "total_flos": 3644665777126717440, "step": 949300 }, { "loss": 2.8025, "learning_rate": 2.380288030628095e-06, "epoch": 3.819431872583688, "total_flos": 3645058495689185280, "step": 949400 }, { "loss": 2.8275, "learning_rate": 2.3794769052321643e-06, "epoch": 3.819834172127883, "total_flos": 3645459011155261440, "step": 949500 }, { "loss": 2.8375, "learning_rate": 2.378665779836234e-06, "epoch": 3.820236471672078, "total_flos": 3645835328601692160, "step": 949600 }, { "loss": 2.8625, "learning_rate": 2.377854654440303e-06, "epoch": 3.8206387712162724, "total_flos": 3646223362648504320, "step": 949700 }, { "loss": 2.855, "learning_rate": 2.3770435290443728e-06, "epoch": 3.821041070760467, "total_flos": 3646594650348533760, "step": 949800 }, { "loss": 2.8075, "learning_rate": 2.376232403648442e-06, "epoch": 3.8214433703046615, "total_flos": 3646966341702973440, "step": 949900 }, { "loss": 2.865, "learning_rate": 2.3754212782525116e-06, "epoch": 3.821845669848856, "total_flos": 3647356000989911040, "step": 950000 }, { "loss": 2.835, "learning_rate": 2.3746101528565808e-06, "epoch": 3.8222479693930507, "total_flos": 3647733200102553600, "step": 950100 }, { "loss": 2.8225, "learning_rate": 2.3737990274606504e-06, "epoch": 3.8226502689372452, "total_flos": 3648112433420974080, "step": 950200 }, { "loss": 2.8525, "learning_rate": 2.3729879020647196e-06, "epoch": 3.82305256848144, "total_flos": 3648487173428459520, "step": 950300 }, { "loss": 2.81, "learning_rate": 2.3721767766687892e-06, "epoch": 3.8234548680256344, "total_flos": 3648847583704381440, "step": 950400 }, { "loss": 2.87, "learning_rate": 2.3713656512728584e-06, "epoch": 3.8238571675698294, "total_flos": 3649222366201804800, "step": 950500 }, { "loss": 2.8425, "learning_rate": 2.370554525876928e-06, "epoch": 3.8242594671140235, "total_flos": 3649596245788047360, "step": 950600 }, { "loss": 2.8275, "learning_rate": 2.3697434004809972e-06, "epoch": 3.8246617666582186, "total_flos": 3649983499082250240, "step": 950700 }, { "loss": 2.7825, "learning_rate": 2.368932275085067e-06, "epoch": 3.825064066202413, "total_flos": 3650366928282040320, "step": 950800 }, { "loss": 2.7775, "learning_rate": 2.368121149689136e-06, "epoch": 3.8254663657466077, "total_flos": 3650748854400276480, "step": 950900 }, { "loss": 2.78, "learning_rate": 2.3673100242932057e-06, "epoch": 3.8258686652908023, "total_flos": 3651129367728076800, "step": 951000 }, { "loss": 2.8425, "learning_rate": 2.3664988988972753e-06, "epoch": 3.826270964834997, "total_flos": 3651518017878988800, "step": 951100 }, { "loss": 2.8225, "learning_rate": 2.365687773501345e-06, "epoch": 3.8266732643791914, "total_flos": 3651915904280156160, "step": 951200 }, { "loss": 2.9375, "learning_rate": 2.364876648105414e-06, "epoch": 3.827075563923386, "total_flos": 3652291573755033600, "step": 951300 }, { "loss": 2.825, "learning_rate": 2.3640655227094837e-06, "epoch": 3.8274778634675806, "total_flos": 3652661103433881600, "step": 951400 }, { "loss": 2.83, "learning_rate": 2.363254397313553e-06, "epoch": 3.827880163011775, "total_flos": 3653041903568762880, "step": 951500 }, { "loss": 2.825, "learning_rate": 2.3624432719176225e-06, "epoch": 3.82828246255597, "total_flos": 3653414110113699840, "step": 951600 }, { "loss": 2.8225, "learning_rate": 2.3616321465216917e-06, "epoch": 3.8286847621001643, "total_flos": 3653799780657715200, "step": 951700 }, { "loss": 2.845, "learning_rate": 2.3608210211257613e-06, "epoch": 3.8290870616443593, "total_flos": 3654177712721786880, "step": 951800 }, { "loss": 2.8275, "learning_rate": 2.3600098957298305e-06, "epoch": 3.829489361188554, "total_flos": 3654584702592153600, "step": 951900 }, { "loss": 2.84, "learning_rate": 2.3591987703339e-06, "epoch": 3.8298916607327484, "total_flos": 3654962884284610560, "step": 952000 }, { "loss": 2.8325, "learning_rate": 2.3583876449379694e-06, "epoch": 3.830293960276943, "total_flos": 3655361434591057920, "step": 952100 }, { "loss": 2.91, "learning_rate": 2.357576519542039e-06, "epoch": 3.8306962598211376, "total_flos": 3655736604809164800, "step": 952200 }, { "loss": 2.8625, "learning_rate": 2.356765394146108e-06, "epoch": 3.831098559365332, "total_flos": 3656124516697405440, "step": 952300 }, { "loss": 2.8525, "learning_rate": 2.355954268750178e-06, "epoch": 3.8315008589095267, "total_flos": 3656506193187256320, "step": 952400 }, { "loss": 2.8725, "learning_rate": 2.355143143354247e-06, "epoch": 3.8319031584537213, "total_flos": 3656891603480401920, "step": 952500 }, { "loss": 2.835, "learning_rate": 2.3543320179583166e-06, "epoch": 3.832305457997916, "total_flos": 3657261287185274880, "step": 952600 }, { "loss": 2.8425, "learning_rate": 2.353520892562386e-06, "epoch": 3.832707757542111, "total_flos": 3657645348422891520, "step": 952700 }, { "loss": 2.8025, "learning_rate": 2.3527097671664554e-06, "epoch": 3.8331100570863055, "total_flos": 3658018213561866240, "step": 952800 }, { "loss": 2.8925, "learning_rate": 2.3518986417705246e-06, "epoch": 3.8335123566305, "total_flos": 3658404096555571200, "step": 952900 }, { "loss": 2.78, "learning_rate": 2.3510875163745942e-06, "epoch": 3.8339146561746946, "total_flos": 3658800469252700160, "step": 953000 }, { "loss": 2.805, "learning_rate": 2.3502763909786634e-06, "epoch": 3.834316955718889, "total_flos": 3659173451239004160, "step": 953100 }, { "loss": 2.855, "learning_rate": 2.349465265582733e-06, "epoch": 3.8347192552630838, "total_flos": 3659563694762588160, "step": 953200 }, { "loss": 2.8025, "learning_rate": 2.3486541401868023e-06, "epoch": 3.8351215548072783, "total_flos": 3659940453042124800, "step": 953300 }, { "loss": 2.9, "learning_rate": 2.347843014790872e-06, "epoch": 3.835523854351473, "total_flos": 3660323722904647680, "step": 953400 }, { "loss": 2.8325, "learning_rate": 2.347031889394941e-06, "epoch": 3.8359261538956675, "total_flos": 3660694394500577280, "step": 953500 }, { "loss": 2.88, "learning_rate": 2.3462207639990107e-06, "epoch": 3.8363284534398625, "total_flos": 3661074514796451840, "step": 953600 }, { "loss": 2.8425, "learning_rate": 2.34540963860308e-06, "epoch": 3.8367307529840566, "total_flos": 3661458963754752000, "step": 953700 }, { "loss": 2.81, "learning_rate": 2.3445985132071495e-06, "epoch": 3.8371330525282517, "total_flos": 3661836141622425600, "step": 953800 }, { "loss": 2.8425, "learning_rate": 2.3437873878112187e-06, "epoch": 3.8375353520724462, "total_flos": 3662222507939174400, "step": 953900 }, { "loss": 2.7725, "learning_rate": 2.3429762624152883e-06, "epoch": 3.837937651616641, "total_flos": 3662612937356236800, "step": 954000 }, { "loss": 2.8975, "learning_rate": 2.3421651370193575e-06, "epoch": 3.8383399511608354, "total_flos": 3663006712855910400, "step": 954100 }, { "loss": 2.855, "learning_rate": 2.341354011623427e-06, "epoch": 3.83874225070503, "total_flos": 3663393376602224640, "step": 954200 }, { "loss": 2.85, "learning_rate": 2.3405428862274963e-06, "epoch": 3.8391445502492245, "total_flos": 3663780518360340480, "step": 954300 }, { "loss": 2.845, "learning_rate": 2.339731760831566e-06, "epoch": 3.839546849793419, "total_flos": 3664180364609894400, "step": 954400 }, { "loss": 2.8725, "learning_rate": 2.338920635435635e-06, "epoch": 3.8399491493376137, "total_flos": 3664561918941173760, "step": 954500 }, { "loss": 2.8075, "learning_rate": 2.3381095100397048e-06, "epoch": 3.8403514488818082, "total_flos": 3664949687425873920, "step": 954600 }, { "loss": 2.83, "learning_rate": 2.337298384643774e-06, "epoch": 3.8407537484260033, "total_flos": 3665333079446968320, "step": 954700 }, { "loss": 2.8175, "learning_rate": 2.3364872592478436e-06, "epoch": 3.8411560479701974, "total_flos": 3665722627197818880, "step": 954800 }, { "loss": 2.845, "learning_rate": 2.3356761338519128e-06, "epoch": 3.8415583475143924, "total_flos": 3666106194489907200, "step": 954900 }, { "loss": 2.8575, "learning_rate": 2.3348650084559824e-06, "epoch": 3.841960647058587, "total_flos": 3666489613067212800, "step": 955000 }, { "loss": 2.835, "learning_rate": 2.3340538830600516e-06, "epoch": 3.8423629466027815, "total_flos": 3666884827913533440, "step": 955100 }, { "loss": 2.84, "learning_rate": 2.333242757664121e-06, "epoch": 3.842765246146976, "total_flos": 3667258171064309760, "step": 955200 }, { "loss": 2.845, "learning_rate": 2.3324316322681904e-06, "epoch": 3.8431675456911707, "total_flos": 3667640251208570880, "step": 955300 }, { "loss": 2.8425, "learning_rate": 2.33162050687226e-06, "epoch": 3.8435698452353653, "total_flos": 3668028099361904640, "step": 955400 }, { "loss": 2.79, "learning_rate": 2.3308093814763292e-06, "epoch": 3.84397214477956, "total_flos": 3668417004452444160, "step": 955500 }, { "loss": 2.81, "learning_rate": 2.329998256080399e-06, "epoch": 3.844374444323755, "total_flos": 3668786119854397440, "step": 955600 }, { "loss": 2.8825, "learning_rate": 2.329187130684468e-06, "epoch": 3.844776743867949, "total_flos": 3669161444098529280, "step": 955700 }, { "loss": 2.8675, "learning_rate": 2.328376005288538e-06, "epoch": 3.845179043412144, "total_flos": 3669545871811860480, "step": 955800 }, { "loss": 2.8325, "learning_rate": 2.3275648798926073e-06, "epoch": 3.8455813429563386, "total_flos": 3669934861882275840, "step": 955900 }, { "loss": 2.8575, "learning_rate": 2.326753754496677e-06, "epoch": 3.845983642500533, "total_flos": 3670313144488335360, "step": 956000 }, { "loss": 2.83, "learning_rate": 2.325942629100746e-06, "epoch": 3.8463859420447277, "total_flos": 3670693498478868480, "step": 956100 }, { "loss": 2.8225, "learning_rate": 2.3251315037048157e-06, "epoch": 3.8467882415889223, "total_flos": 3671076800208844800, "step": 956200 }, { "loss": 2.8475, "learning_rate": 2.324320378308885e-06, "epoch": 3.847190541133117, "total_flos": 3671456623075153920, "step": 956300 }, { "loss": 2.8425, "learning_rate": 2.3235092529129545e-06, "epoch": 3.8475928406773114, "total_flos": 3671841943077181440, "step": 956400 }, { "loss": 2.8625, "learning_rate": 2.3226981275170237e-06, "epoch": 3.847995140221506, "total_flos": 3672220921455974400, "step": 956500 }, { "loss": 2.88, "learning_rate": 2.3218870021210933e-06, "epoch": 3.8483974397657006, "total_flos": 3672611834196080640, "step": 956600 }, { "loss": 2.8575, "learning_rate": 2.3210758767251625e-06, "epoch": 3.8487997393098956, "total_flos": 3672986149304186880, "step": 956700 }, { "loss": 2.835, "learning_rate": 2.320264751329232e-06, "epoch": 3.8492020388540897, "total_flos": 3673357421070489600, "step": 956800 }, { "loss": 2.825, "learning_rate": 2.3194536259333013e-06, "epoch": 3.8496043383982848, "total_flos": 3673734471468349440, "step": 956900 }, { "loss": 2.86, "learning_rate": 2.318642500537371e-06, "epoch": 3.8500066379424793, "total_flos": 3674112844365527040, "step": 957000 }, { "loss": 2.885, "learning_rate": 2.31783137514144e-06, "epoch": 3.850408937486674, "total_flos": 3674491790876866560, "step": 957100 }, { "loss": 2.825, "learning_rate": 2.3170202497455098e-06, "epoch": 3.8508112370308685, "total_flos": 3674889788814120960, "step": 957200 }, { "loss": 2.8125, "learning_rate": 2.316209124349579e-06, "epoch": 3.851213536575063, "total_flos": 3675279198472673280, "step": 957300 }, { "loss": 2.87, "learning_rate": 2.3153979989536486e-06, "epoch": 3.8516158361192576, "total_flos": 3675650156875683840, "step": 957400 }, { "loss": 2.81, "learning_rate": 2.314586873557718e-06, "epoch": 3.852018135663452, "total_flos": 3676040076413491200, "step": 957500 }, { "loss": 2.8325, "learning_rate": 2.3137757481617874e-06, "epoch": 3.8524204352076468, "total_flos": 3676414003800913920, "step": 957600 }, { "loss": 2.8325, "learning_rate": 2.3129646227658566e-06, "epoch": 3.8528227347518413, "total_flos": 3676810010022328320, "step": 957700 }, { "loss": 2.8675, "learning_rate": 2.3121534973699262e-06, "epoch": 3.8532250342960364, "total_flos": 3677184893433354240, "step": 957800 }, { "loss": 2.8375, "learning_rate": 2.3113423719739954e-06, "epoch": 3.8536273338402305, "total_flos": 3677566166268794880, "step": 957900 }, { "loss": 2.8175, "learning_rate": 2.310531246578065e-06, "epoch": 3.8540296333844255, "total_flos": 3677944825973053440, "step": 958000 }, { "loss": 2.8375, "learning_rate": 2.3097201211821342e-06, "epoch": 3.85443193292862, "total_flos": 3678327129189488640, "step": 958100 }, { "loss": 2.7975, "learning_rate": 2.308908995786204e-06, "epoch": 3.8548342324728146, "total_flos": 3678700206778152960, "step": 958200 }, { "loss": 2.8175, "learning_rate": 2.308097870390273e-06, "epoch": 3.855236532017009, "total_flos": 3679076518913341440, "step": 958300 }, { "loss": 2.8525, "learning_rate": 2.3072867449943427e-06, "epoch": 3.855638831561204, "total_flos": 3679462964898723840, "step": 958400 }, { "loss": 2.835, "learning_rate": 2.306475619598412e-06, "epoch": 3.8560411311053984, "total_flos": 3679839144252856320, "step": 958500 }, { "loss": 2.7875, "learning_rate": 2.3056644942024815e-06, "epoch": 3.856443430649593, "total_flos": 3680212444913694720, "step": 958600 }, { "loss": 2.9175, "learning_rate": 2.3048533688065507e-06, "epoch": 3.856845730193788, "total_flos": 3680609545251010560, "step": 958700 }, { "loss": 2.8, "learning_rate": 2.3040422434106203e-06, "epoch": 3.857248029737982, "total_flos": 3680972951067555840, "step": 958800 }, { "loss": 2.8525, "learning_rate": 2.3032311180146895e-06, "epoch": 3.857650329282177, "total_flos": 3681360145938094080, "step": 958900 }, { "loss": 2.805, "learning_rate": 2.302419992618759e-06, "epoch": 3.8580526288263717, "total_flos": 3681741339104901120, "step": 959000 }, { "loss": 2.8275, "learning_rate": 2.3016088672228283e-06, "epoch": 3.8584549283705663, "total_flos": 3682147277349304320, "step": 959100 }, { "loss": 2.84, "learning_rate": 2.300797741826898e-06, "epoch": 3.858857227914761, "total_flos": 3682540309275064320, "step": 959200 }, { "loss": 2.87, "learning_rate": 2.299986616430967e-06, "epoch": 3.8592595274589554, "total_flos": 3682901325032601600, "step": 959300 }, { "loss": 2.855, "learning_rate": 2.2991754910350368e-06, "epoch": 3.85966182700315, "total_flos": 3683282688159160320, "step": 959400 }, { "loss": 2.8575, "learning_rate": 2.298364365639106e-06, "epoch": 3.8600641265473445, "total_flos": 3683661273506027520, "step": 959500 }, { "loss": 2.8175, "learning_rate": 2.2975532402431756e-06, "epoch": 3.860466426091539, "total_flos": 3684041367245690880, "step": 959600 }, { "loss": 2.8425, "learning_rate": 2.2967421148472448e-06, "epoch": 3.8608687256357337, "total_flos": 3684436635204433920, "step": 959700 }, { "loss": 2.8225, "learning_rate": 2.2959309894513144e-06, "epoch": 3.8612710251799287, "total_flos": 3684824823277271040, "step": 959800 }, { "loss": 2.8325, "learning_rate": 2.2951198640553836e-06, "epoch": 3.861673324724123, "total_flos": 3685208608330291200, "step": 959900 }, { "loss": 2.86, "learning_rate": 2.294308738659453e-06, "epoch": 3.862075624268318, "total_flos": 3685591586074490880, "step": 960000 }, { "loss": 2.835, "learning_rate": 2.2934976132635224e-06, "epoch": 3.8624779238125124, "total_flos": 3685979290824284160, "step": 960100 }, { "loss": 2.815, "learning_rate": 2.292686487867592e-06, "epoch": 3.862880223356707, "total_flos": 3686338362667161600, "step": 960200 }, { "loss": 2.8325, "learning_rate": 2.291875362471661e-06, "epoch": 3.8632825229009016, "total_flos": 3686725881523476480, "step": 960300 }, { "loss": 2.855, "learning_rate": 2.291064237075731e-06, "epoch": 3.863684822445096, "total_flos": 3687102862875187200, "step": 960400 }, { "loss": 2.865, "learning_rate": 2.2902531116798e-06, "epoch": 3.8640871219892907, "total_flos": 3687477783464908800, "step": 960500 }, { "loss": 2.8075, "learning_rate": 2.28944198628387e-06, "epoch": 3.8644894215334853, "total_flos": 3687872631835514880, "step": 960600 }, { "loss": 2.835, "learning_rate": 2.2886308608879393e-06, "epoch": 3.86489172107768, "total_flos": 3688254090564433920, "step": 960700 }, { "loss": 2.8025, "learning_rate": 2.287819735492009e-06, "epoch": 3.8652940206218744, "total_flos": 3688633977165649920, "step": 960800 }, { "loss": 2.8025, "learning_rate": 2.287008610096078e-06, "epoch": 3.8656963201660695, "total_flos": 3689018181806807040, "step": 960900 }, { "loss": 2.82, "learning_rate": 2.2861974847001477e-06, "epoch": 3.866098619710264, "total_flos": 3689413210759649280, "step": 961000 }, { "loss": 2.8125, "learning_rate": 2.285386359304217e-06, "epoch": 3.8665009192544586, "total_flos": 3689794616376145920, "step": 961100 }, { "loss": 2.795, "learning_rate": 2.2845752339082865e-06, "epoch": 3.866903218798653, "total_flos": 3690192587757189120, "step": 961200 }, { "loss": 2.8375, "learning_rate": 2.2837641085123557e-06, "epoch": 3.8673055183428477, "total_flos": 3690585385988290560, "step": 961300 }, { "loss": 2.7775, "learning_rate": 2.2829529831164253e-06, "epoch": 3.8677078178870423, "total_flos": 3690975751670446080, "step": 961400 }, { "loss": 2.8225, "learning_rate": 2.2821418577204945e-06, "epoch": 3.868110117431237, "total_flos": 3691349264780974080, "step": 961500 }, { "loss": 2.8525, "learning_rate": 2.281330732324564e-06, "epoch": 3.8685124169754315, "total_flos": 3691738079580395520, "step": 961600 }, { "loss": 2.805, "learning_rate": 2.2805196069286333e-06, "epoch": 3.868914716519626, "total_flos": 3692122385135155200, "step": 961700 }, { "loss": 2.815, "learning_rate": 2.279708481532703e-06, "epoch": 3.869317016063821, "total_flos": 3692502192067737600, "step": 961800 }, { "loss": 2.8275, "learning_rate": 2.278897356136772e-06, "epoch": 3.869719315608015, "total_flos": 3692884946739763200, "step": 961900 }, { "loss": 2.875, "learning_rate": 2.2780862307408418e-06, "epoch": 3.87012161515221, "total_flos": 3693273331328563200, "step": 962000 }, { "loss": 2.8325, "learning_rate": 2.277275105344911e-06, "epoch": 3.8705239146964048, "total_flos": 3693656999534254080, "step": 962100 }, { "loss": 2.83, "learning_rate": 2.2764639799489806e-06, "epoch": 3.8709262142405994, "total_flos": 3694038516686837760, "step": 962200 }, { "loss": 2.815, "learning_rate": 2.2756528545530498e-06, "epoch": 3.871328513784794, "total_flos": 3694413352296683520, "step": 962300 }, { "loss": 2.7875, "learning_rate": 2.2748417291571194e-06, "epoch": 3.8717308133289885, "total_flos": 3694789754722990080, "step": 962400 }, { "loss": 2.83, "learning_rate": 2.2740306037611886e-06, "epoch": 3.872133112873183, "total_flos": 3695178500476262400, "step": 962500 }, { "loss": 2.805, "learning_rate": 2.2732194783652582e-06, "epoch": 3.8725354124173776, "total_flos": 3695552374751262720, "step": 962600 }, { "loss": 2.79, "learning_rate": 2.2724083529693274e-06, "epoch": 3.872937711961572, "total_flos": 3695950930368952320, "step": 962700 }, { "loss": 2.8125, "learning_rate": 2.271597227573397e-06, "epoch": 3.873340011505767, "total_flos": 3696334710110730240, "step": 962800 }, { "loss": 2.8125, "learning_rate": 2.2707861021774662e-06, "epoch": 3.873742311049962, "total_flos": 3696724884588165120, "step": 962900 }, { "loss": 2.8475, "learning_rate": 2.269974976781536e-06, "epoch": 3.874144610594156, "total_flos": 3697101414484285440, "step": 963000 }, { "loss": 2.7975, "learning_rate": 2.269163851385605e-06, "epoch": 3.874546910138351, "total_flos": 3697494340185200640, "step": 963100 }, { "loss": 2.86, "learning_rate": 2.2683527259896747e-06, "epoch": 3.8749492096825455, "total_flos": 3697868724339456000, "step": 963200 }, { "loss": 2.79, "learning_rate": 2.267541600593744e-06, "epoch": 3.87535150922674, "total_flos": 3698251457766512640, "step": 963300 }, { "loss": 2.805, "learning_rate": 2.2667304751978135e-06, "epoch": 3.8757538087709347, "total_flos": 3698617301443246080, "step": 963400 }, { "loss": 2.785, "learning_rate": 2.2659193498018827e-06, "epoch": 3.8761561083151292, "total_flos": 3698996529450424320, "step": 963500 }, { "loss": 2.85, "learning_rate": 2.2651082244059523e-06, "epoch": 3.876558407859324, "total_flos": 3699377552657479680, "step": 963600 }, { "loss": 2.8475, "learning_rate": 2.2642970990100215e-06, "epoch": 3.8769607074035184, "total_flos": 3699764944043980800, "step": 963700 }, { "loss": 2.845, "learning_rate": 2.263485973614091e-06, "epoch": 3.877363006947713, "total_flos": 3700142127222896640, "step": 963800 }, { "loss": 2.7675, "learning_rate": 2.2626748482181603e-06, "epoch": 3.8777653064919075, "total_flos": 3700530219693373440, "step": 963900 }, { "loss": 2.8625, "learning_rate": 2.26186372282223e-06, "epoch": 3.8781676060361026, "total_flos": 3700926348073359360, "step": 964000 }, { "loss": 2.8, "learning_rate": 2.261052597426299e-06, "epoch": 3.878569905580297, "total_flos": 3701337331997890560, "step": 964100 }, { "loss": 2.8425, "learning_rate": 2.2602414720303687e-06, "epoch": 3.8789722051244917, "total_flos": 3701713734424197120, "step": 964200 }, { "loss": 2.83, "learning_rate": 2.259430346634438e-06, "epoch": 3.8793745046686863, "total_flos": 3702097083955353600, "step": 964300 }, { "loss": 2.815, "learning_rate": 2.2586192212385076e-06, "epoch": 3.879776804212881, "total_flos": 3702480284771727360, "step": 964400 }, { "loss": 2.835, "learning_rate": 2.2578080958425768e-06, "epoch": 3.8801791037570754, "total_flos": 3702852215132067840, "step": 964500 }, { "loss": 2.8, "learning_rate": 2.2569969704466464e-06, "epoch": 3.88058140330127, "total_flos": 3703234135939061760, "step": 964600 }, { "loss": 2.885, "learning_rate": 2.2561858450507156e-06, "epoch": 3.8809837028454646, "total_flos": 3703611292561766400, "step": 964700 }, { "loss": 2.785, "learning_rate": 2.255374719654785e-06, "epoch": 3.881386002389659, "total_flos": 3703991179162982400, "step": 964800 }, { "loss": 2.7975, "learning_rate": 2.2545635942588544e-06, "epoch": 3.881788301933854, "total_flos": 3704370210654197760, "step": 964900 }, { "loss": 2.845, "learning_rate": 2.253752468862924e-06, "epoch": 3.8821906014780483, "total_flos": 3704747154827212800, "step": 965000 }, { "loss": 2.8275, "learning_rate": 2.252941343466993e-06, "epoch": 3.8825929010222433, "total_flos": 3705122967705630720, "step": 965100 }, { "loss": 2.8025, "learning_rate": 2.252130218071063e-06, "epoch": 3.882995200566438, "total_flos": 3705485911444101120, "step": 965200 }, { "loss": 2.805, "learning_rate": 2.2513190926751324e-06, "epoch": 3.8833975001106324, "total_flos": 3705872622991595520, "step": 965300 }, { "loss": 2.8825, "learning_rate": 2.250507967279202e-06, "epoch": 3.883799799654827, "total_flos": 3706265771764684800, "step": 965400 }, { "loss": 2.7875, "learning_rate": 2.2496968418832713e-06, "epoch": 3.8842020991990216, "total_flos": 3706663780324423680, "step": 965500 }, { "loss": 2.8725, "learning_rate": 2.248885716487341e-06, "epoch": 3.884604398743216, "total_flos": 3707052356117944320, "step": 965600 }, { "loss": 2.8575, "learning_rate": 2.24807459109141e-06, "epoch": 3.8850066982874107, "total_flos": 3707443497241466880, "step": 965700 }, { "loss": 2.8325, "learning_rate": 2.2472634656954797e-06, "epoch": 3.8854089978316053, "total_flos": 3707815379800627200, "step": 965800 }, { "loss": 2.7575, "learning_rate": 2.246452340299549e-06, "epoch": 3.8858112973758, "total_flos": 3708206898022348800, "step": 965900 }, { "loss": 2.7825, "learning_rate": 2.2456412149036185e-06, "epoch": 3.886213596919995, "total_flos": 3708583183601326080, "step": 966000 }, { "loss": 2.8075, "learning_rate": 2.2448300895076877e-06, "epoch": 3.886615896464189, "total_flos": 3708968174306334720, "step": 966100 }, { "loss": 2.815, "learning_rate": 2.2440189641117573e-06, "epoch": 3.887018196008384, "total_flos": 3709367574411540480, "step": 966200 }, { "loss": 2.8225, "learning_rate": 2.2432078387158265e-06, "epoch": 3.8874204955525786, "total_flos": 3709756776931645440, "step": 966300 }, { "loss": 2.8425, "learning_rate": 2.242396713319896e-06, "epoch": 3.887822795096773, "total_flos": 3710136934406215680, "step": 966400 }, { "loss": 2.7975, "learning_rate": 2.2415855879239653e-06, "epoch": 3.8882250946409678, "total_flos": 3710521877310044160, "step": 966500 }, { "loss": 2.835, "learning_rate": 2.240774462528035e-06, "epoch": 3.8886273941851623, "total_flos": 3710902024162129920, "step": 966600 }, { "loss": 2.83, "learning_rate": 2.239963337132104e-06, "epoch": 3.889029693729357, "total_flos": 3711297759510190080, "step": 966700 }, { "loss": 2.8575, "learning_rate": 2.2391522117361738e-06, "epoch": 3.8894319932735515, "total_flos": 3711675627839354880, "step": 966800 }, { "loss": 2.81, "learning_rate": 2.238341086340243e-06, "epoch": 3.8898342928177465, "total_flos": 3712058260352808960, "step": 966900 }, { "loss": 2.8375, "learning_rate": 2.2375299609443126e-06, "epoch": 3.8902365923619406, "total_flos": 3712439124222597120, "step": 967000 }, { "loss": 2.795, "learning_rate": 2.2367188355483818e-06, "epoch": 3.8906388919061357, "total_flos": 3712820051827292160, "step": 967100 }, { "loss": 2.84, "learning_rate": 2.2359077101524514e-06, "epoch": 3.8910411914503302, "total_flos": 3713204835393853440, "step": 967200 }, { "loss": 2.835, "learning_rate": 2.2350965847565206e-06, "epoch": 3.891443490994525, "total_flos": 3713585523992647680, "step": 967300 }, { "loss": 2.86, "learning_rate": 2.23428545936059e-06, "epoch": 3.8918457905387194, "total_flos": 3713979363227228160, "step": 967400 }, { "loss": 2.8625, "learning_rate": 2.2334743339646594e-06, "epoch": 3.892248090082914, "total_flos": 3714368013378140160, "step": 967500 }, { "loss": 2.8475, "learning_rate": 2.232663208568729e-06, "epoch": 3.8926503896271085, "total_flos": 3714762867059988480, "step": 967600 }, { "loss": 2.8175, "learning_rate": 2.2318520831727982e-06, "epoch": 3.893052689171303, "total_flos": 3715143635327416320, "step": 967700 }, { "loss": 2.8725, "learning_rate": 2.231040957776868e-06, "epoch": 3.8934549887154977, "total_flos": 3715536380446095360, "step": 967800 }, { "loss": 2.7975, "learning_rate": 2.230229832380937e-06, "epoch": 3.8938572882596922, "total_flos": 3715918465901598720, "step": 967900 }, { "loss": 2.8325, "learning_rate": 2.2294187069850067e-06, "epoch": 3.8942595878038873, "total_flos": 3716302569629153280, "step": 968000 }, { "loss": 2.7725, "learning_rate": 2.228607581589076e-06, "epoch": 3.8946618873480814, "total_flos": 3716705305194485760, "step": 968100 }, { "loss": 2.7975, "learning_rate": 2.2277964561931455e-06, "epoch": 3.8950641868922764, "total_flos": 3717087815549368320, "step": 968200 }, { "loss": 2.8125, "learning_rate": 2.2269853307972147e-06, "epoch": 3.895466486436471, "total_flos": 3717460770979461120, "step": 968300 }, { "loss": 2.785, "learning_rate": 2.2261742054012843e-06, "epoch": 3.8958687859806655, "total_flos": 3717853739170314240, "step": 968400 }, { "loss": 2.9, "learning_rate": 2.2253630800053535e-06, "epoch": 3.89627108552486, "total_flos": 3718231203845068800, "step": 968500 }, { "loss": 2.835, "learning_rate": 2.224551954609423e-06, "epoch": 3.8966733850690547, "total_flos": 3718603575038515200, "step": 968600 }, { "loss": 2.76, "learning_rate": 2.2237408292134923e-06, "epoch": 3.8970756846132493, "total_flos": 3718996707877877760, "step": 968700 }, { "loss": 2.8625, "learning_rate": 2.222929703817562e-06, "epoch": 3.897477984157444, "total_flos": 3719365557717719040, "step": 968800 }, { "loss": 2.85, "learning_rate": 2.222118578421631e-06, "epoch": 3.8978802837016384, "total_flos": 3719749528664217600, "step": 968900 }, { "loss": 2.7925, "learning_rate": 2.2213074530257007e-06, "epoch": 3.898282583245833, "total_flos": 3720143612215941120, "step": 969000 }, { "loss": 2.8575, "learning_rate": 2.22049632762977e-06, "epoch": 3.898684882790028, "total_flos": 3720526234106910720, "step": 969100 }, { "loss": 2.8325, "learning_rate": 2.2196852022338395e-06, "epoch": 3.8990871823342226, "total_flos": 3720902116031477760, "step": 969200 }, { "loss": 2.815, "learning_rate": 2.2188740768379087e-06, "epoch": 3.899489481878417, "total_flos": 3721287584748288000, "step": 969300 }, { "loss": 2.83, "learning_rate": 2.2180629514419784e-06, "epoch": 3.8998917814226117, "total_flos": 3721672851637893120, "step": 969400 }, { "loss": 2.7625, "learning_rate": 2.2172518260460476e-06, "epoch": 3.9002940809668063, "total_flos": 3722052477988239360, "step": 969500 }, { "loss": 2.8225, "learning_rate": 2.216440700650117e-06, "epoch": 3.900696380511001, "total_flos": 3722418401333606400, "step": 969600 }, { "loss": 2.8375, "learning_rate": 2.2156295752541864e-06, "epoch": 3.9010986800551954, "total_flos": 3722800279650662400, "step": 969700 }, { "loss": 2.8725, "learning_rate": 2.214818449858256e-06, "epoch": 3.90150097959939, "total_flos": 3723175014346905600, "step": 969800 }, { "loss": 2.845, "learning_rate": 2.214007324462325e-06, "epoch": 3.9019032791435846, "total_flos": 3723544156305070080, "step": 969900 }, { "loss": 2.75, "learning_rate": 2.213196199066395e-06, "epoch": 3.9023055786877796, "total_flos": 3723930177391073280, "step": 970000 }, { "loss": 2.855, "learning_rate": 2.2123850736704644e-06, "epoch": 3.9027078782319737, "total_flos": 3724322067399751680, "step": 970100 }, { "loss": 2.7825, "learning_rate": 2.2115739482745336e-06, "epoch": 3.9031101777761688, "total_flos": 3724704370616186880, "step": 970200 }, { "loss": 2.86, "learning_rate": 2.2107628228786032e-06, "epoch": 3.9035124773203633, "total_flos": 3725101938342819840, "step": 970300 }, { "loss": 2.7575, "learning_rate": 2.2099516974826724e-06, "epoch": 3.903914776864558, "total_flos": 3725475499254528000, "step": 970400 }, { "loss": 2.8225, "learning_rate": 2.209140572086742e-06, "epoch": 3.9043170764087525, "total_flos": 3725873959269857280, "step": 970500 }, { "loss": 2.79, "learning_rate": 2.2083294466908113e-06, "epoch": 3.904719375952947, "total_flos": 3726250706926909440, "step": 970600 }, { "loss": 2.83, "learning_rate": 2.207518321294881e-06, "epoch": 3.9051216754971416, "total_flos": 3726626169263339520, "step": 970700 }, { "loss": 2.8575, "learning_rate": 2.20670719589895e-06, "epoch": 3.905523975041336, "total_flos": 3727015929463879680, "step": 970800 }, { "loss": 2.8575, "learning_rate": 2.2058960705030197e-06, "epoch": 3.9059262745855308, "total_flos": 3727392167241676800, "step": 970900 }, { "loss": 2.84, "learning_rate": 2.205084945107089e-06, "epoch": 3.9063285741297253, "total_flos": 3727766461104814080, "step": 971000 }, { "loss": 2.81, "learning_rate": 2.2042738197111585e-06, "epoch": 3.9067308736739204, "total_flos": 3728139671474534400, "step": 971100 }, { "loss": 2.77, "learning_rate": 2.203462694315228e-06, "epoch": 3.9071331732181145, "total_flos": 3728526791987681280, "step": 971200 }, { "loss": 2.84, "learning_rate": 2.2026515689192973e-06, "epoch": 3.9075354727623095, "total_flos": 3728924280045680640, "step": 971300 }, { "loss": 2.835, "learning_rate": 2.201840443523367e-06, "epoch": 3.907937772306504, "total_flos": 3729294595788380160, "step": 971400 }, { "loss": 2.7875, "learning_rate": 2.201029318127436e-06, "epoch": 3.9083400718506986, "total_flos": 3729684334743951360, "step": 971500 }, { "loss": 2.79, "learning_rate": 2.2002181927315058e-06, "epoch": 3.9087423713948932, "total_flos": 3730059032261498880, "step": 971600 }, { "loss": 2.8525, "learning_rate": 2.199407067335575e-06, "epoch": 3.909144670939088, "total_flos": 3730432471014635520, "step": 971700 }, { "loss": 2.8425, "learning_rate": 2.1985959419396446e-06, "epoch": 3.9095469704832824, "total_flos": 3730825672900147200, "step": 971800 }, { "loss": 2.8275, "learning_rate": 2.1977848165437138e-06, "epoch": 3.909949270027477, "total_flos": 3731208703756769280, "step": 971900 }, { "loss": 2.875, "learning_rate": 2.1969736911477834e-06, "epoch": 3.9103515695716715, "total_flos": 3731591931129354240, "step": 972000 }, { "loss": 2.865, "learning_rate": 2.1961625657518526e-06, "epoch": 3.910753869115866, "total_flos": 3731973103051192320, "step": 972100 }, { "loss": 2.76, "learning_rate": 2.195351440355922e-06, "epoch": 3.911156168660061, "total_flos": 3732356829680547840, "step": 972200 }, { "loss": 2.7975, "learning_rate": 2.1945403149599914e-06, "epoch": 3.9115584682042557, "total_flos": 3732749728825251840, "step": 972300 }, { "loss": 2.8125, "learning_rate": 2.193729189564061e-06, "epoch": 3.9119607677484503, "total_flos": 3733131065395599360, "step": 972400 }, { "loss": 2.835, "learning_rate": 2.19291806416813e-06, "epoch": 3.912363067292645, "total_flos": 3733516008299427840, "step": 972500 }, { "loss": 2.8325, "learning_rate": 2.1921069387722e-06, "epoch": 3.9127653668368394, "total_flos": 3733907154734192640, "step": 972600 }, { "loss": 2.795, "learning_rate": 2.191295813376269e-06, "epoch": 3.913167666381034, "total_flos": 3734295985467340800, "step": 972700 }, { "loss": 2.78, "learning_rate": 2.1904846879803386e-06, "epoch": 3.9135699659252285, "total_flos": 3734679568693155840, "step": 972800 }, { "loss": 2.8425, "learning_rate": 2.189673562584408e-06, "epoch": 3.913972265469423, "total_flos": 3735058557694433280, "step": 972900 }, { "loss": 2.835, "learning_rate": 2.1888624371884775e-06, "epoch": 3.9143745650136177, "total_flos": 3735445242685716480, "step": 973000 }, { "loss": 2.835, "learning_rate": 2.1880513117925467e-06, "epoch": 3.9147768645578127, "total_flos": 3735842651075082240, "step": 973100 }, { "loss": 2.8025, "learning_rate": 2.1872401863966163e-06, "epoch": 3.915179164102007, "total_flos": 3736238859123701760, "step": 973200 }, { "loss": 2.805, "learning_rate": 2.1864290610006855e-06, "epoch": 3.915581463646202, "total_flos": 3736626755078215680, "step": 973300 }, { "loss": 2.7825, "learning_rate": 2.185617935604755e-06, "epoch": 3.9159837631903964, "total_flos": 3737019192144844800, "step": 973400 }, { "loss": 2.8075, "learning_rate": 2.1848068102088243e-06, "epoch": 3.916386062734591, "total_flos": 3737401771545876480, "step": 973500 }, { "loss": 2.8375, "learning_rate": 2.183995684812894e-06, "epoch": 3.9167883622787856, "total_flos": 3737771529608140800, "step": 973600 }, { "loss": 2.805, "learning_rate": 2.183184559416963e-06, "epoch": 3.91719066182298, "total_flos": 3738175268998256640, "step": 973700 }, { "loss": 2.795, "learning_rate": 2.1823734340210327e-06, "epoch": 3.9175929613671747, "total_flos": 3738568152209233920, "step": 973800 }, { "loss": 2.835, "learning_rate": 2.181562308625102e-06, "epoch": 3.9179952609113693, "total_flos": 3738958119548221440, "step": 973900 }, { "loss": 2.8675, "learning_rate": 2.1807511832291715e-06, "epoch": 3.918397560455564, "total_flos": 3739353801783859200, "step": 974000 }, { "loss": 2.875, "learning_rate": 2.1799400578332407e-06, "epoch": 3.9187998599997584, "total_flos": 3739733555604019200, "step": 974100 }, { "loss": 2.8875, "learning_rate": 2.1791289324373103e-06, "epoch": 3.9192021595439535, "total_flos": 3740110972477593600, "step": 974200 }, { "loss": 2.8375, "learning_rate": 2.1783178070413795e-06, "epoch": 3.9196044590881476, "total_flos": 3740482265488865280, "step": 974300 }, { "loss": 2.8475, "learning_rate": 2.177506681645449e-06, "epoch": 3.9200067586323426, "total_flos": 3740851131262433280, "step": 974400 }, { "loss": 2.88, "learning_rate": 2.1766955562495184e-06, "epoch": 3.920409058176537, "total_flos": 3741226710446192640, "step": 974500 }, { "loss": 2.7975, "learning_rate": 2.175884430853588e-06, "epoch": 3.9208113577207317, "total_flos": 3741607786765670400, "step": 974600 }, { "loss": 2.845, "learning_rate": 2.175073305457657e-06, "epoch": 3.9212136572649263, "total_flos": 3741993712249313280, "step": 974700 }, { "loss": 2.845, "learning_rate": 2.174262180061727e-06, "epoch": 3.921615956809121, "total_flos": 3742385464165693440, "step": 974800 }, { "loss": 2.8, "learning_rate": 2.1734510546657964e-06, "epoch": 3.9220182563533155, "total_flos": 3742780742746920960, "step": 974900 }, { "loss": 2.8275, "learning_rate": 2.1726399292698656e-06, "epoch": 3.92242055589751, "total_flos": 3743168389073049600, "step": 975000 }, { "loss": 2.8425, "learning_rate": 2.1718288038739352e-06, "epoch": 3.922822855441705, "total_flos": 3743542852895938560, "step": 975100 }, { "loss": 2.8025, "learning_rate": 2.1710176784780044e-06, "epoch": 3.923225154985899, "total_flos": 3743928937716848640, "step": 975200 }, { "loss": 2.83, "learning_rate": 2.170206553082074e-06, "epoch": 3.923627454530094, "total_flos": 3744304415987005440, "step": 975300 }, { "loss": 2.8325, "learning_rate": 2.1693954276861432e-06, "epoch": 3.924029754074289, "total_flos": 3744681885973002240, "step": 975400 }, { "loss": 2.7725, "learning_rate": 2.168584302290213e-06, "epoch": 3.9244320536184834, "total_flos": 3745079448388392960, "step": 975500 }, { "loss": 2.83, "learning_rate": 2.167773176894282e-06, "epoch": 3.924834353162678, "total_flos": 3745461932187064320, "step": 975600 }, { "loss": 2.8075, "learning_rate": 2.1669620514983517e-06, "epoch": 3.9252366527068725, "total_flos": 3745831599958210560, "step": 975700 }, { "loss": 2.82, "learning_rate": 2.166150926102421e-06, "epoch": 3.925638952251067, "total_flos": 3746207120718305280, "step": 975800 }, { "loss": 2.84, "learning_rate": 2.1653398007064905e-06, "epoch": 3.9260412517952616, "total_flos": 3746595861160335360, "step": 975900 }, { "loss": 2.765, "learning_rate": 2.1645286753105597e-06, "epoch": 3.926443551339456, "total_flos": 3746989148025722880, "step": 976000 }, { "loss": 2.8525, "learning_rate": 2.1637175499146293e-06, "epoch": 3.926845850883651, "total_flos": 3747380225414338560, "step": 976100 }, { "loss": 2.82, "learning_rate": 2.1629064245186985e-06, "epoch": 3.927248150427846, "total_flos": 3747747837734737920, "step": 976200 }, { "loss": 2.835, "learning_rate": 2.162095299122768e-06, "epoch": 3.92765044997204, "total_flos": 3748128712227010560, "step": 976300 }, { "loss": 2.8225, "learning_rate": 2.1612841737268373e-06, "epoch": 3.928052749516235, "total_flos": 3748516794075002880, "step": 976400 }, { "loss": 2.8325, "learning_rate": 2.160473048330907e-06, "epoch": 3.9284550490604295, "total_flos": 3748904530692249600, "step": 976500 }, { "loss": 2.7925, "learning_rate": 2.159661922934976e-06, "epoch": 3.928857348604624, "total_flos": 3749287179139430400, "step": 976600 }, { "loss": 2.8125, "learning_rate": 2.1588507975390458e-06, "epoch": 3.9292596481488187, "total_flos": 3749686547377182720, "step": 976700 }, { "loss": 2.825, "learning_rate": 2.1580396721431154e-06, "epoch": 3.9296619476930132, "total_flos": 3750067522783057920, "step": 976800 }, { "loss": 2.8525, "learning_rate": 2.1572285467471846e-06, "epoch": 3.930064247237208, "total_flos": 3750446867637565440, "step": 976900 }, { "loss": 2.815, "learning_rate": 2.156417421351254e-06, "epoch": 3.9304665467814024, "total_flos": 3750814442779269120, "step": 977000 }, { "loss": 2.84, "learning_rate": 2.1556062959553234e-06, "epoch": 3.930868846325597, "total_flos": 3751199656556451840, "step": 977100 }, { "loss": 2.82, "learning_rate": 2.154795170559393e-06, "epoch": 3.9312711458697915, "total_flos": 3751583515966863360, "step": 977200 }, { "loss": 2.8475, "learning_rate": 2.153984045163462e-06, "epoch": 3.9316734454139866, "total_flos": 3751983755248343040, "step": 977300 }, { "loss": 2.87, "learning_rate": 2.153172919767532e-06, "epoch": 3.9320757449581807, "total_flos": 3752374189976647680, "step": 977400 }, { "loss": 2.85, "learning_rate": 2.152361794371601e-06, "epoch": 3.9324780445023757, "total_flos": 3752764778730977280, "step": 977500 }, { "loss": 2.8525, "learning_rate": 2.1515506689756706e-06, "epoch": 3.9328803440465703, "total_flos": 3753155972966922240, "step": 977600 }, { "loss": 2.83, "learning_rate": 2.15073954357974e-06, "epoch": 3.933282643590765, "total_flos": 3753530734219376640, "step": 977700 }, { "loss": 2.8225, "learning_rate": 2.1499284181838094e-06, "epoch": 3.9336849431349594, "total_flos": 3753915873639168000, "step": 977800 }, { "loss": 2.7525, "learning_rate": 2.1491172927878786e-06, "epoch": 3.934087242679154, "total_flos": 3754303562455234560, "step": 977900 }, { "loss": 2.8425, "learning_rate": 2.1483061673919483e-06, "epoch": 3.9344895422233486, "total_flos": 3754685775380551680, "step": 978000 }, { "loss": 2.8325, "learning_rate": 2.1474950419960175e-06, "epoch": 3.934891841767543, "total_flos": 3755063558729840640, "step": 978100 }, { "loss": 2.8175, "learning_rate": 2.146683916600087e-06, "epoch": 3.935294141311738, "total_flos": 3755447439385221120, "step": 978200 }, { "loss": 2.8275, "learning_rate": 2.1458727912041563e-06, "epoch": 3.9356964408559323, "total_flos": 3755808295805491200, "step": 978300 }, { "loss": 2.8775, "learning_rate": 2.145061665808226e-06, "epoch": 3.9360987404001273, "total_flos": 3756189016271738880, "step": 978400 }, { "loss": 2.835, "learning_rate": 2.144250540412295e-06, "epoch": 3.936501039944322, "total_flos": 3756574607147120640, "step": 978500 }, { "loss": 2.845, "learning_rate": 2.1434394150163647e-06, "epoch": 3.9369033394885165, "total_flos": 3756954339722311680, "step": 978600 }, { "loss": 2.835, "learning_rate": 2.142628289620434e-06, "epoch": 3.937305639032711, "total_flos": 3757332415189923840, "step": 978700 }, { "loss": 2.8625, "learning_rate": 2.1418171642245035e-06, "epoch": 3.9377079385769056, "total_flos": 3757719567570524160, "step": 978800 }, { "loss": 2.7575, "learning_rate": 2.1410060388285727e-06, "epoch": 3.9381102381211, "total_flos": 3758109588021934080, "step": 978900 }, { "loss": 2.84, "learning_rate": 2.1401949134326423e-06, "epoch": 3.9385125376652947, "total_flos": 3758502959867197440, "step": 979000 }, { "loss": 2.8425, "learning_rate": 2.1393837880367115e-06, "epoch": 3.9389148372094893, "total_flos": 3758873573039462400, "step": 979100 }, { "loss": 2.8075, "learning_rate": 2.138572662640781e-06, "epoch": 3.939317136753684, "total_flos": 3759271108898641920, "step": 979200 }, { "loss": 2.805, "learning_rate": 2.1377615372448503e-06, "epoch": 3.939719436297879, "total_flos": 3759671666854656000, "step": 979300 }, { "loss": 2.82, "learning_rate": 2.13695041184892e-06, "epoch": 3.940121735842073, "total_flos": 3760049397091522560, "step": 979400 }, { "loss": 2.8175, "learning_rate": 2.136139286452989e-06, "epoch": 3.940524035386268, "total_flos": 3760423127962982400, "step": 979500 }, { "loss": 2.83, "learning_rate": 2.1353281610570588e-06, "epoch": 3.9409263349304626, "total_flos": 3760820616020981760, "step": 979600 }, { "loss": 2.8275, "learning_rate": 2.1345170356611284e-06, "epoch": 3.941328634474657, "total_flos": 3761212559142082560, "step": 979700 }, { "loss": 2.8725, "learning_rate": 2.1337059102651976e-06, "epoch": 3.9417309340188518, "total_flos": 3761602383077529600, "step": 979800 }, { "loss": 2.805, "learning_rate": 2.1328947848692672e-06, "epoch": 3.9421332335630463, "total_flos": 3761973622976378880, "step": 979900 }, { "loss": 2.82, "learning_rate": 2.1320836594733364e-06, "epoch": 3.942535533107241, "total_flos": 3762363940857354240, "step": 980000 }, { "loss": 2.8325, "learning_rate": 2.131272534077406e-06, "epoch": 3.9429378326514355, "total_flos": 3762758130633922560, "step": 980100 }, { "loss": 2.83, "learning_rate": 2.1304614086814752e-06, "epoch": 3.94334013219563, "total_flos": 3763143918025267200, "step": 980200 }, { "loss": 2.8, "learning_rate": 2.129650283285545e-06, "epoch": 3.9437424317398246, "total_flos": 3763529402675804160, "step": 980300 }, { "loss": 2.8225, "learning_rate": 2.128839157889614e-06, "epoch": 3.9441447312840197, "total_flos": 3763896016482662400, "step": 980400 }, { "loss": 2.8075, "learning_rate": 2.1280280324936837e-06, "epoch": 3.9445470308282142, "total_flos": 3764288145497241600, "step": 980500 }, { "loss": 2.8325, "learning_rate": 2.127216907097753e-06, "epoch": 3.944949330372409, "total_flos": 3764663416628951040, "step": 980600 }, { "loss": 2.8375, "learning_rate": 2.1264057817018225e-06, "epoch": 3.9453516299166034, "total_flos": 3765029934833448960, "step": 980700 }, { "loss": 2.795, "learning_rate": 2.1255946563058917e-06, "epoch": 3.945753929460798, "total_flos": 3765420311138088960, "step": 980800 }, { "loss": 2.83, "learning_rate": 2.1247835309099613e-06, "epoch": 3.9461562290049925, "total_flos": 3765821926031308800, "step": 980900 }, { "loss": 2.82, "learning_rate": 2.1239724055140305e-06, "epoch": 3.946558528549187, "total_flos": 3766198779913205760, "step": 981000 }, { "loss": 2.84, "learning_rate": 2.1231612801181e-06, "epoch": 3.9469608280933817, "total_flos": 3766580599806597120, "step": 981100 }, { "loss": 2.7825, "learning_rate": 2.1223501547221693e-06, "epoch": 3.9473631276375762, "total_flos": 3766968161152849920, "step": 981200 }, { "loss": 2.8275, "learning_rate": 2.121539029326239e-06, "epoch": 3.9477654271817713, "total_flos": 3767352695091025920, "step": 981300 }, { "loss": 2.8325, "learning_rate": 2.120727903930308e-06, "epoch": 3.9481677267259654, "total_flos": 3767756785023129600, "step": 981400 }, { "loss": 2.7825, "learning_rate": 2.1199167785343777e-06, "epoch": 3.9485700262701604, "total_flos": 3768140012395714560, "step": 981500 }, { "loss": 2.8025, "learning_rate": 2.119105653138447e-06, "epoch": 3.948972325814355, "total_flos": 3768517344289413120, "step": 981600 }, { "loss": 2.8275, "learning_rate": 2.1182945277425166e-06, "epoch": 3.9493746253585496, "total_flos": 3768893210280253440, "step": 981700 }, { "loss": 2.795, "learning_rate": 2.1174834023465858e-06, "epoch": 3.949776924902744, "total_flos": 3769284585098434560, "step": 981800 }, { "loss": 2.8, "learning_rate": 2.1166722769506554e-06, "epoch": 3.9501792244469387, "total_flos": 3769660339553187840, "step": 981900 }, { "loss": 2.84, "learning_rate": 2.1158611515547246e-06, "epoch": 3.9505815239911333, "total_flos": 3770031404181043200, "step": 982000 }, { "loss": 2.8625, "learning_rate": 2.115050026158794e-06, "epoch": 3.950983823535328, "total_flos": 3770425540845189120, "step": 982100 }, { "loss": 2.82, "learning_rate": 2.1142389007628634e-06, "epoch": 3.9513861230795224, "total_flos": 3770809570215352320, "step": 982200 }, { "loss": 2.775, "learning_rate": 2.113427775366933e-06, "epoch": 3.951788422623717, "total_flos": 3771203489118566400, "step": 982300 }, { "loss": 2.855, "learning_rate": 2.1126166499710026e-06, "epoch": 3.952190722167912, "total_flos": 3771586689934940160, "step": 982400 }, { "loss": 2.83, "learning_rate": 2.111805524575072e-06, "epoch": 3.952593021712106, "total_flos": 3771976827233679360, "step": 982500 }, { "loss": 2.8175, "learning_rate": 2.1109943991791414e-06, "epoch": 3.952995321256301, "total_flos": 3772374124086958080, "step": 982600 }, { "loss": 2.8675, "learning_rate": 2.1101832737832106e-06, "epoch": 3.9533976208004957, "total_flos": 3772749634224568320, "step": 982700 }, { "loss": 2.8175, "learning_rate": 2.1093721483872802e-06, "epoch": 3.9537999203446903, "total_flos": 3773123333228574720, "step": 982800 }, { "loss": 2.8075, "learning_rate": 2.1085610229913494e-06, "epoch": 3.954202219888885, "total_flos": 3773499448847800320, "step": 982900 }, { "loss": 2.885, "learning_rate": 2.107749897595419e-06, "epoch": 3.9546045194330794, "total_flos": 3773870911818823680, "step": 983000 }, { "loss": 2.7825, "learning_rate": 2.1069387721994883e-06, "epoch": 3.955006818977274, "total_flos": 3774248594254510080, "step": 983100 }, { "loss": 2.8525, "learning_rate": 2.106127646803558e-06, "epoch": 3.9554091185214686, "total_flos": 3774647495102945280, "step": 983200 }, { "loss": 2.7925, "learning_rate": 2.105316521407627e-06, "epoch": 3.955811418065663, "total_flos": 3775033500255221760, "step": 983300 }, { "loss": 2.82, "learning_rate": 2.1045053960116967e-06, "epoch": 3.9562137176098577, "total_flos": 3775419016773212160, "step": 983400 }, { "loss": 2.81, "learning_rate": 2.103694270615766e-06, "epoch": 3.9566160171540528, "total_flos": 3775833421137745920, "step": 983500 }, { "loss": 2.7825, "learning_rate": 2.1028831452198355e-06, "epoch": 3.9570183166982473, "total_flos": 3776224270142945280, "step": 983600 }, { "loss": 2.84, "learning_rate": 2.1020720198239047e-06, "epoch": 3.957420616242442, "total_flos": 3776612463527024640, "step": 983700 }, { "loss": 2.8825, "learning_rate": 2.1012608944279743e-06, "epoch": 3.9578229157866365, "total_flos": 3777010063121111040, "step": 983800 }, { "loss": 2.81, "learning_rate": 2.1004497690320435e-06, "epoch": 3.958225215330831, "total_flos": 3777392010484316160, "step": 983900 }, { "loss": 2.8575, "learning_rate": 2.099638643636113e-06, "epoch": 3.9586275148750256, "total_flos": 3777782896668211200, "step": 984000 }, { "loss": 2.815, "learning_rate": 2.0988275182401823e-06, "epoch": 3.95902981441922, "total_flos": 3778175285933660160, "step": 984100 }, { "loss": 2.83, "learning_rate": 2.098016392844252e-06, "epoch": 3.9594321139634148, "total_flos": 3778553759744440320, "step": 984200 }, { "loss": 2.7925, "learning_rate": 2.0972052674483216e-06, "epoch": 3.9598344135076093, "total_flos": 3778939409043486720, "step": 984300 }, { "loss": 2.8175, "learning_rate": 2.0963941420523908e-06, "epoch": 3.9602367130518044, "total_flos": 3779320028596131840, "step": 984400 }, { "loss": 2.815, "learning_rate": 2.0955830166564604e-06, "epoch": 3.9606390125959985, "total_flos": 3779707919239403520, "step": 984500 }, { "loss": 2.785, "learning_rate": 2.0947718912605296e-06, "epoch": 3.9610413121401935, "total_flos": 3780092840898263040, "step": 984600 }, { "loss": 2.79, "learning_rate": 2.093960765864599e-06, "epoch": 3.961443611684388, "total_flos": 3780481565406566400, "step": 984700 }, { "loss": 2.83, "learning_rate": 2.0931496404686684e-06, "epoch": 3.9618459112285827, "total_flos": 3780851689944545280, "step": 984800 }, { "loss": 2.8225, "learning_rate": 2.092338515072738e-06, "epoch": 3.9622482107727772, "total_flos": 3781241158026762240, "step": 984900 }, { "loss": 2.795, "learning_rate": 2.0915273896768072e-06, "epoch": 3.962650510316972, "total_flos": 3781640127921346560, "step": 985000 }, { "loss": 2.7675, "learning_rate": 2.090716264280877e-06, "epoch": 3.9630528098611664, "total_flos": 3782028624046233600, "step": 985100 }, { "loss": 2.8675, "learning_rate": 2.089905138884946e-06, "epoch": 3.963455109405361, "total_flos": 3782409854391736320, "step": 985200 }, { "loss": 2.805, "learning_rate": 2.0890940134890157e-06, "epoch": 3.9638574089495555, "total_flos": 3782798015908362240, "step": 985300 }, { "loss": 2.8325, "learning_rate": 2.088282888093085e-06, "epoch": 3.96425970849375, "total_flos": 3783192439379589120, "step": 985400 }, { "loss": 2.82, "learning_rate": 2.0874717626971545e-06, "epoch": 3.964662008037945, "total_flos": 3783581838415656960, "step": 985500 }, { "loss": 2.7825, "learning_rate": 2.0866606373012237e-06, "epoch": 3.9650643075821392, "total_flos": 3783962766020352000, "step": 985600 }, { "loss": 2.82, "learning_rate": 2.0858495119052933e-06, "epoch": 3.9654666071263343, "total_flos": 3784349466945361920, "step": 985700 }, { "loss": 2.8625, "learning_rate": 2.0850383865093625e-06, "epoch": 3.965868906670529, "total_flos": 3784730298947696640, "step": 985800 }, { "loss": 2.8, "learning_rate": 2.084227261113432e-06, "epoch": 3.9662712062147234, "total_flos": 3785111890457671680, "step": 985900 }, { "loss": 2.8275, "learning_rate": 2.0834161357175013e-06, "epoch": 3.966673505758918, "total_flos": 3785491357470750720, "step": 986000 }, { "loss": 2.8125, "learning_rate": 2.082605010321571e-06, "epoch": 3.9670758053031125, "total_flos": 3785860122330716160, "step": 986100 }, { "loss": 2.805, "learning_rate": 2.08179388492564e-06, "epoch": 3.967478104847307, "total_flos": 3786252639065978880, "step": 986200 }, { "loss": 2.8275, "learning_rate": 2.0809827595297097e-06, "epoch": 3.9678804043915017, "total_flos": 3786638612350801920, "step": 986300 }, { "loss": 2.8375, "learning_rate": 2.080171634133779e-06, "epoch": 3.9682827039356967, "total_flos": 3787011700561950720, "step": 986400 }, { "loss": 2.8175, "learning_rate": 2.0793605087378485e-06, "epoch": 3.968685003479891, "total_flos": 3787394641127454720, "step": 986500 }, { "loss": 2.7925, "learning_rate": 2.0785493833419177e-06, "epoch": 3.969087303024086, "total_flos": 3787775276613826560, "step": 986600 }, { "loss": 2.795, "learning_rate": 2.0777382579459874e-06, "epoch": 3.9694896025682804, "total_flos": 3788153038718146560, "step": 986700 }, { "loss": 2.825, "learning_rate": 2.0769271325500566e-06, "epoch": 3.969891902112475, "total_flos": 3788524294550722560, "step": 986800 }, { "loss": 2.8025, "learning_rate": 2.076116007154126e-06, "epoch": 3.9702942016566696, "total_flos": 3788894169460316160, "step": 986900 }, { "loss": 2.825, "learning_rate": 2.0753048817581954e-06, "epoch": 3.970696501200864, "total_flos": 3789273105349171200, "step": 987000 }, { "loss": 2.8225, "learning_rate": 2.074493756362265e-06, "epoch": 3.9710988007450587, "total_flos": 3789658797138155520, "step": 987100 }, { "loss": 2.8025, "learning_rate": 2.073682630966334e-06, "epoch": 3.9715011002892533, "total_flos": 3790043012401797120, "step": 987200 }, { "loss": 2.855, "learning_rate": 2.072871505570404e-06, "epoch": 3.971903399833448, "total_flos": 3790426855878481920, "step": 987300 }, { "loss": 2.75, "learning_rate": 2.072060380174473e-06, "epoch": 3.9723056993776424, "total_flos": 3790820881006540800, "step": 987400 }, { "loss": 2.81, "learning_rate": 2.0712492547785426e-06, "epoch": 3.9727079989218375, "total_flos": 3791226442152744960, "step": 987500 }, { "loss": 2.805, "learning_rate": 2.070438129382612e-06, "epoch": 3.9731102984660316, "total_flos": 3791614901098936320, "step": 987600 }, { "loss": 2.83, "learning_rate": 2.0696270039866814e-06, "epoch": 3.9735125980102266, "total_flos": 3792011220683642880, "step": 987700 }, { "loss": 2.795, "learning_rate": 2.0688158785907506e-06, "epoch": 3.973914897554421, "total_flos": 3792385721685227520, "step": 987800 }, { "loss": 2.8175, "learning_rate": 2.0680047531948202e-06, "epoch": 3.9743171970986158, "total_flos": 3792768646317004800, "step": 987900 }, { "loss": 2.77, "learning_rate": 2.0671936277988894e-06, "epoch": 3.9747194966428103, "total_flos": 3793142297519831040, "step": 988000 }, { "loss": 2.8325, "learning_rate": 2.066382502402959e-06, "epoch": 3.975121796187005, "total_flos": 3793531372570122240, "step": 988100 }, { "loss": 2.78, "learning_rate": 2.0655713770070287e-06, "epoch": 3.9755240957311995, "total_flos": 3793902670892636160, "step": 988200 }, { "loss": 2.765, "learning_rate": 2.064760251611098e-06, "epoch": 3.975926395275394, "total_flos": 3794285101578885120, "step": 988300 }, { "loss": 2.85, "learning_rate": 2.0639491262151675e-06, "epoch": 3.9763286948195886, "total_flos": 3794667027697121280, "step": 988400 }, { "loss": 2.805, "learning_rate": 2.0631380008192367e-06, "epoch": 3.976730994363783, "total_flos": 3795057159684618240, "step": 988500 }, { "loss": 2.785, "learning_rate": 2.0623268754233063e-06, "epoch": 3.977133293907978, "total_flos": 3795446112576337920, "step": 988600 }, { "loss": 2.8475, "learning_rate": 2.0615157500273755e-06, "epoch": 3.977535593452173, "total_flos": 3795836589794580480, "step": 988700 }, { "loss": 2.7725, "learning_rate": 2.060704624631445e-06, "epoch": 3.9779378929963674, "total_flos": 3796219126705674240, "step": 988800 }, { "loss": 2.865, "learning_rate": 2.0598934992355143e-06, "epoch": 3.978340192540562, "total_flos": 3796605647048448000, "step": 988900 }, { "loss": 2.78, "learning_rate": 2.059082373839584e-06, "epoch": 3.9787424920847565, "total_flos": 3796991673445693440, "step": 989000 }, { "loss": 2.79, "learning_rate": 2.0582712484436536e-06, "epoch": 3.979144791628951, "total_flos": 3797385826043566080, "step": 989100 }, { "loss": 2.76, "learning_rate": 2.0574601230477228e-06, "epoch": 3.9795470911731456, "total_flos": 3797753077199493120, "step": 989200 }, { "loss": 2.8125, "learning_rate": 2.0566489976517924e-06, "epoch": 3.97994939071734, "total_flos": 3798138864590837760, "step": 989300 }, { "loss": 2.845, "learning_rate": 2.0558378722558616e-06, "epoch": 3.980351690261535, "total_flos": 3798514167590000640, "step": 989400 }, { "loss": 2.8175, "learning_rate": 2.055026746859931e-06, "epoch": 3.98075398980573, "total_flos": 3798899689419233280, "step": 989500 }, { "loss": 2.8125, "learning_rate": 2.0542156214640004e-06, "epoch": 3.981156289349924, "total_flos": 3799284664190515200, "step": 989600 }, { "loss": 2.795, "learning_rate": 2.05340449606807e-06, "epoch": 3.981558588894119, "total_flos": 3799672836329625600, "step": 989700 }, { "loss": 2.825, "learning_rate": 2.052593370672139e-06, "epoch": 3.9819608884383135, "total_flos": 3800042727172945920, "step": 989800 }, { "loss": 2.815, "learning_rate": 2.051782245276209e-06, "epoch": 3.982363187982508, "total_flos": 3800433305304791040, "step": 989900 }, { "loss": 2.82, "learning_rate": 2.050971119880278e-06, "epoch": 3.9827654875267027, "total_flos": 3800817871110420480, "step": 990000 }, { "loss": 2.84, "learning_rate": 2.0501599944843476e-06, "epoch": 3.9831677870708972, "total_flos": 3801188022204610560, "step": 990100 }, { "loss": 2.875, "learning_rate": 2.049348869088417e-06, "epoch": 3.983570086615092, "total_flos": 3801581680856954880, "step": 990200 }, { "loss": 2.8175, "learning_rate": 2.0485377436924865e-06, "epoch": 3.9839723861592864, "total_flos": 3801960356494940160, "step": 990300 }, { "loss": 2.87, "learning_rate": 2.0477266182965557e-06, "epoch": 3.984374685703481, "total_flos": 3802338761259571200, "step": 990400 }, { "loss": 2.8325, "learning_rate": 2.0469154929006253e-06, "epoch": 3.9847769852476755, "total_flos": 3802737237208627200, "step": 990500 }, { "loss": 2.81, "learning_rate": 2.0461043675046945e-06, "epoch": 3.9851792847918706, "total_flos": 3803119821920901120, "step": 990600 }, { "loss": 2.8, "learning_rate": 2.045293242108764e-06, "epoch": 3.9855815843360647, "total_flos": 3803495741024163840, "step": 990700 }, { "loss": 2.805, "learning_rate": 2.0444821167128333e-06, "epoch": 3.9859838838802597, "total_flos": 3803872409012582400, "step": 990800 }, { "loss": 2.74, "learning_rate": 2.043670991316903e-06, "epoch": 3.9863861834244543, "total_flos": 3804240340007516160, "step": 990900 }, { "loss": 2.7825, "learning_rate": 2.042859865920972e-06, "epoch": 3.986788482968649, "total_flos": 3804609476654438400, "step": 991000 }, { "loss": 2.795, "learning_rate": 2.0420487405250417e-06, "epoch": 3.9871907825128434, "total_flos": 3805000261924730880, "step": 991100 }, { "loss": 2.825, "learning_rate": 2.041237615129111e-06, "epoch": 3.987593082057038, "total_flos": 3805386612307752960, "step": 991200 }, { "loss": 2.8, "learning_rate": 2.0404264897331805e-06, "epoch": 3.9879953816012326, "total_flos": 3805781821842831360, "step": 991300 }, { "loss": 2.8125, "learning_rate": 2.0396153643372497e-06, "epoch": 3.988397681145427, "total_flos": 3806165691875727360, "step": 991400 }, { "loss": 2.8, "learning_rate": 2.0388042389413193e-06, "epoch": 3.9887999806896217, "total_flos": 3806554235801794560, "step": 991500 }, { "loss": 2.8325, "learning_rate": 2.0379931135453885e-06, "epoch": 3.9892022802338163, "total_flos": 3806925874043811840, "step": 991600 }, { "loss": 2.84, "learning_rate": 2.037181988149458e-06, "epoch": 3.9896045797780113, "total_flos": 3807321492544542720, "step": 991700 }, { "loss": 2.7975, "learning_rate": 2.0363708627535274e-06, "epoch": 3.990006879322206, "total_flos": 3807698309247744000, "step": 991800 }, { "loss": 2.83, "learning_rate": 2.035559737357597e-06, "epoch": 3.9904091788664005, "total_flos": 3808093741854996480, "step": 991900 }, { "loss": 2.8175, "learning_rate": 2.034748611961666e-06, "epoch": 3.990811478410595, "total_flos": 3808476974538823680, "step": 992000 }, { "loss": 2.765, "learning_rate": 2.033937486565736e-06, "epoch": 3.9912137779547896, "total_flos": 3808864573063772160, "step": 992100 }, { "loss": 2.815, "learning_rate": 2.033126361169805e-06, "epoch": 3.991616077498984, "total_flos": 3809240863953991680, "step": 992200 }, { "loss": 2.8075, "learning_rate": 2.0323152357738746e-06, "epoch": 3.9920183770431787, "total_flos": 3809642871879137280, "step": 992300 }, { "loss": 2.8, "learning_rate": 2.031504110377944e-06, "epoch": 3.9924206765873733, "total_flos": 3810040556453099520, "step": 992400 }, { "loss": 2.875, "learning_rate": 2.0306929849820134e-06, "epoch": 3.992822976131568, "total_flos": 3810421085714626560, "step": 992500 }, { "loss": 2.7925, "learning_rate": 2.0298818595860826e-06, "epoch": 3.993225275675763, "total_flos": 3810805635586529280, "step": 992600 }, { "loss": 2.815, "learning_rate": 2.0290707341901522e-06, "epoch": 3.993627575219957, "total_flos": 3811201466536949760, "step": 992700 }, { "loss": 2.78, "learning_rate": 2.0282596087942214e-06, "epoch": 3.994029874764152, "total_flos": 3811595029586933760, "step": 992800 }, { "loss": 2.845, "learning_rate": 2.027448483398291e-06, "epoch": 3.9944321743083466, "total_flos": 3811989075959961600, "step": 992900 }, { "loss": 2.7525, "learning_rate": 2.0266373580023603e-06, "epoch": 3.994834473852541, "total_flos": 3812376971914475520, "step": 993000 }, { "loss": 2.78, "learning_rate": 2.02582623260643e-06, "epoch": 3.9952367733967358, "total_flos": 3812748201190840320, "step": 993100 }, { "loss": 2.8675, "learning_rate": 2.025015107210499e-06, "epoch": 3.9956390729409303, "total_flos": 3813142321921259520, "step": 993200 }, { "loss": 2.825, "learning_rate": 2.0242039818145687e-06, "epoch": 3.996041372485125, "total_flos": 3813526829303224320, "step": 993300 }, { "loss": 2.81, "learning_rate": 2.023392856418638e-06, "epoch": 3.9964436720293195, "total_flos": 3813901946408908800, "step": 993400 }, { "loss": 2.7975, "learning_rate": 2.0225817310227075e-06, "epoch": 3.996845971573514, "total_flos": 3814285147225282560, "step": 993500 }, { "loss": 2.7875, "learning_rate": 2.0217706056267767e-06, "epoch": 3.9972482711177086, "total_flos": 3814674562195077120, "step": 993600 }, { "loss": 2.8475, "learning_rate": 2.0209594802308463e-06, "epoch": 3.9976505706619037, "total_flos": 3815070170073323520, "step": 993700 }, { "loss": 2.765, "learning_rate": 2.020148354834916e-06, "epoch": 3.998052870206098, "total_flos": 3815443555714037760, "step": 993800 }, { "loss": 2.76, "learning_rate": 2.0193372294389856e-06, "epoch": 3.998455169750293, "total_flos": 3815832322712279040, "step": 993900 }, { "loss": 2.8225, "learning_rate": 2.0185261040430547e-06, "epoch": 3.9988574692944874, "total_flos": 3816243476596561920, "step": 994000 }, { "loss": 2.81, "learning_rate": 2.0177149786471244e-06, "epoch": 3.999259768838682, "total_flos": 3816636949355427840, "step": 994100 }, { "loss": 2.7925, "learning_rate": 2.0169038532511936e-06, "epoch": 3.9996620683828765, "total_flos": 3817028483510876160, "step": 994200 }, { "loss": 2.8125, "learning_rate": 2.016092727855263e-06, "epoch": 4.000064367927071, "total_flos": 3817417838729195520, "step": 994300 }, { "loss": 2.815, "learning_rate": 2.0152816024593324e-06, "epoch": 4.000466667471266, "total_flos": 3817804948619857920, "step": 994400 }, { "loss": 2.8175, "learning_rate": 2.014470477063402e-06, "epoch": 4.00086896701546, "total_flos": 3818176268187340800, "step": 994500 }, { "loss": 2.7775, "learning_rate": 2.013659351667471e-06, "epoch": 4.001271266559655, "total_flos": 3818554115271536640, "step": 994600 }, { "loss": 2.7625, "learning_rate": 2.012848226271541e-06, "epoch": 4.001673566103849, "total_flos": 3818925312680448000, "step": 994700 }, { "loss": 2.85, "learning_rate": 2.01203710087561e-06, "epoch": 4.002075865648044, "total_flos": 3819293073715630080, "step": 994800 }, { "loss": 2.7575, "learning_rate": 2.0112259754796796e-06, "epoch": 4.0024781651922385, "total_flos": 3819685059326668800, "step": 994900 }, { "loss": 2.7925, "learning_rate": 2.010414850083749e-06, "epoch": 4.0028804647364336, "total_flos": 3820077846935285760, "step": 995000 }, { "loss": 2.7675, "learning_rate": 2.0096037246878184e-06, "epoch": 4.003282764280628, "total_flos": 3820458992300912640, "step": 995100 }, { "loss": 2.7625, "learning_rate": 2.0087925992918876e-06, "epoch": 4.003685063824823, "total_flos": 3820843605907722240, "step": 995200 }, { "loss": 2.84, "learning_rate": 2.0079814738959573e-06, "epoch": 4.004087363369018, "total_flos": 3821218404338872320, "step": 995300 }, { "loss": 2.79, "learning_rate": 2.0071703485000265e-06, "epoch": 4.004489662913212, "total_flos": 3821592730069463040, "step": 995400 }, { "loss": 2.7975, "learning_rate": 2.006359223104096e-06, "epoch": 4.004891962457407, "total_flos": 3821989841029263360, "step": 995500 }, { "loss": 2.8125, "learning_rate": 2.0055480977081653e-06, "epoch": 4.005294262001601, "total_flos": 3822364920956252160, "step": 995600 }, { "loss": 2.8175, "learning_rate": 2.004736972312235e-06, "epoch": 4.005696561545796, "total_flos": 3822740192087961600, "step": 995700 }, { "loss": 2.79, "learning_rate": 2.003925846916304e-06, "epoch": 4.00609886108999, "total_flos": 3823130658683719680, "step": 995800 }, { "loss": 2.7875, "learning_rate": 2.0031147215203737e-06, "epoch": 4.006501160634185, "total_flos": 3823523016081715200, "step": 995900 }, { "loss": 2.7975, "learning_rate": 2.002303596124443e-06, "epoch": 4.006903460178379, "total_flos": 3823932852777922560, "step": 996000 }, { "loss": 2.8225, "learning_rate": 2.0014924707285125e-06, "epoch": 4.007305759722574, "total_flos": 3824331658023997440, "step": 996100 }, { "loss": 2.7725, "learning_rate": 2.0006813453325817e-06, "epoch": 4.007708059266768, "total_flos": 3824710110589808640, "step": 996200 }, { "loss": 2.7875, "learning_rate": 1.9998702199366513e-06, "epoch": 4.0081103588109634, "total_flos": 3825091250644193280, "step": 996300 }, { "loss": 2.815, "learning_rate": 1.9990590945407205e-06, "epoch": 4.0085126583551585, "total_flos": 3825458836408381440, "step": 996400 }, { "loss": 2.8125, "learning_rate": 1.99824796914479e-06, "epoch": 4.008914957899353, "total_flos": 3825816527328276480, "step": 996500 }, { "loss": 2.79, "learning_rate": 1.9974368437488593e-06, "epoch": 4.009317257443548, "total_flos": 3826208852858818560, "step": 996600 }, { "loss": 2.7925, "learning_rate": 1.996625718352929e-06, "epoch": 4.009719556987742, "total_flos": 3826587841860096000, "step": 996700 }, { "loss": 2.775, "learning_rate": 1.995814592956998e-06, "epoch": 4.010121856531937, "total_flos": 3826973921369763840, "step": 996800 }, { "loss": 2.8125, "learning_rate": 1.9950034675610678e-06, "epoch": 4.010524156076131, "total_flos": 3827354471876259840, "step": 996900 }, { "loss": 2.875, "learning_rate": 1.994192342165137e-06, "epoch": 4.010926455620326, "total_flos": 3827755162613329920, "step": 997000 }, { "loss": 2.7875, "learning_rate": 1.9933812167692066e-06, "epoch": 4.01132875516452, "total_flos": 3828132244878643200, "step": 997100 }, { "loss": 2.855, "learning_rate": 1.992570091373276e-06, "epoch": 4.011731054708715, "total_flos": 3828525457386639360, "step": 997200 }, { "loss": 2.7975, "learning_rate": 1.9917589659773454e-06, "epoch": 4.01213335425291, "total_flos": 3828919333799915520, "step": 997300 }, { "loss": 2.7975, "learning_rate": 1.9909478405814146e-06, "epoch": 4.012535653797104, "total_flos": 3829294222522183680, "step": 997400 }, { "loss": 2.82, "learning_rate": 1.9901367151854842e-06, "epoch": 4.012937953341299, "total_flos": 3829675989303152640, "step": 997500 }, { "loss": 2.8175, "learning_rate": 1.9893255897895534e-06, "epoch": 4.013340252885493, "total_flos": 3830064788168847360, "step": 997600 }, { "loss": 2.825, "learning_rate": 1.988514464393623e-06, "epoch": 4.013742552429688, "total_flos": 3830451786523422720, "step": 997700 }, { "loss": 2.8125, "learning_rate": 1.9877033389976922e-06, "epoch": 4.0141448519738825, "total_flos": 3830833346165944320, "step": 997800 }, { "loss": 2.8025, "learning_rate": 1.986892213601762e-06, "epoch": 4.0145471515180775, "total_flos": 3831237845063700480, "step": 997900 }, { "loss": 2.8425, "learning_rate": 1.986081088205831e-06, "epoch": 4.014949451062272, "total_flos": 3831639146593628160, "step": 998000 }, { "loss": 2.805, "learning_rate": 1.9852699628099007e-06, "epoch": 4.015351750606467, "total_flos": 3832015564953661440, "step": 998100 }, { "loss": 2.835, "learning_rate": 1.98445883741397e-06, "epoch": 4.015754050150661, "total_flos": 3832406190886686720, "step": 998200 }, { "loss": 2.8825, "learning_rate": 1.9836477120180395e-06, "epoch": 4.016156349694856, "total_flos": 3832796402542817280, "step": 998300 }, { "loss": 2.805, "learning_rate": 1.9828365866221087e-06, "epoch": 4.016558649239051, "total_flos": 3833171928614154240, "step": 998400 }, { "loss": 2.795, "learning_rate": 1.9820254612261787e-06, "epoch": 4.016960948783245, "total_flos": 3833567902968115200, "step": 998500 }, { "loss": 2.8375, "learning_rate": 1.981214335830248e-06, "epoch": 4.01736324832744, "total_flos": 3833960833980272640, "step": 998600 }, { "loss": 2.79, "learning_rate": 1.9804032104343175e-06, "epoch": 4.017765547871634, "total_flos": 3834340826806333440, "step": 998700 }, { "loss": 2.8175, "learning_rate": 1.9795920850383867e-06, "epoch": 4.018167847415829, "total_flos": 3834724314429788160, "step": 998800 }, { "loss": 2.8425, "learning_rate": 1.9787809596424564e-06, "epoch": 4.018570146960023, "total_flos": 3835098411776962560, "step": 998900 }, { "loss": 2.7825, "learning_rate": 1.9779698342465256e-06, "epoch": 4.018972446504218, "total_flos": 3835470697990533120, "step": 999000 }, { "loss": 2.79, "learning_rate": 1.977158708850595e-06, "epoch": 4.019374746048412, "total_flos": 3835851338788147200, "step": 999100 }, { "loss": 2.8225, "learning_rate": 1.9763475834546644e-06, "epoch": 4.019777045592607, "total_flos": 3836243781166018560, "step": 999200 }, { "loss": 2.7825, "learning_rate": 1.975536458058734e-06, "epoch": 4.0201793451368015, "total_flos": 3836627465305436160, "step": 999300 }, { "loss": 2.8, "learning_rate": 1.974725332662803e-06, "epoch": 4.0205816446809965, "total_flos": 3837032829935677440, "step": 999400 }, { "loss": 2.81, "learning_rate": 1.973914207266873e-06, "epoch": 4.020983944225192, "total_flos": 3837420986141061120, "step": 999500 }, { "loss": 2.8575, "learning_rate": 1.973103081870942e-06, "epoch": 4.021386243769386, "total_flos": 3837803188443893760, "step": 999600 }, { "loss": 2.7825, "learning_rate": 1.9722919564750116e-06, "epoch": 4.021788543313581, "total_flos": 3838190324890767360, "step": 999700 }, { "loss": 2.77, "learning_rate": 1.971480831079081e-06, "epoch": 4.022190842857775, "total_flos": 3838553693528616960, "step": 999800 }, { "loss": 2.7775, "learning_rate": 1.9706697056831504e-06, "epoch": 4.02259314240197, "total_flos": 3838925618577715200, "step": 999900 }, { "loss": 2.775, "learning_rate": 1.9698585802872196e-06, "epoch": 4.022995441946164, "total_flos": 3839305861032161280, "step": 1000000 }, { "loss": 2.8125, "learning_rate": 1.9690474548912892e-06, "epoch": 4.023397741490359, "total_flos": 3839680335477534720, "step": 1000100 }, { "loss": 2.8175, "learning_rate": 1.9682363294953584e-06, "epoch": 4.023800041034553, "total_flos": 3840068741311303680, "step": 1000200 }, { "loss": 2.815, "learning_rate": 1.967425204099428e-06, "epoch": 4.024202340578748, "total_flos": 3840452000551342080, "step": 1000300 }, { "loss": 2.79, "learning_rate": 1.9666140787034973e-06, "epoch": 4.024604640122943, "total_flos": 3840835201367715840, "step": 1000400 }, { "loss": 2.84, "learning_rate": 1.965802953307567e-06, "epoch": 4.025006939667137, "total_flos": 3841216309554647040, "step": 1000500 }, { "loss": 2.815, "learning_rate": 1.964991827911636e-06, "epoch": 4.025409239211332, "total_flos": 3841602994545930240, "step": 1000600 }, { "loss": 2.8425, "learning_rate": 1.9641807025157057e-06, "epoch": 4.025811538755526, "total_flos": 3841982700564910080, "step": 1000700 }, { "loss": 2.7925, "learning_rate": 1.963369577119775e-06, "epoch": 4.0262138382997215, "total_flos": 3842364228339978240, "step": 1000800 }, { "loss": 2.825, "learning_rate": 1.9625584517238445e-06, "epoch": 4.026616137843916, "total_flos": 3842747567248650240, "step": 1000900 }, { "loss": 2.8025, "learning_rate": 1.9617473263279137e-06, "epoch": 4.027018437388111, "total_flos": 3843132781025832960, "step": 1001000 }, { "loss": 2.8375, "learning_rate": 1.9609362009319833e-06, "epoch": 4.027420736932305, "total_flos": 3843506225090211840, "step": 1001100 }, { "loss": 2.795, "learning_rate": 1.9601250755360525e-06, "epoch": 4.0278230364765, "total_flos": 3843884789192110080, "step": 1001200 }, { "loss": 2.83, "learning_rate": 1.959313950140122e-06, "epoch": 4.028225336020694, "total_flos": 3844247355832381440, "step": 1001300 }, { "loss": 2.8, "learning_rate": 1.9585028247441913e-06, "epoch": 4.028627635564889, "total_flos": 3844630381377761280, "step": 1001400 }, { "loss": 2.795, "learning_rate": 1.957691699348261e-06, "epoch": 4.029029935109084, "total_flos": 3845017724963082240, "step": 1001500 }, { "loss": 2.8, "learning_rate": 1.95688057395233e-06, "epoch": 4.029432234653278, "total_flos": 3845403055587594240, "step": 1001600 }, { "loss": 2.7725, "learning_rate": 1.9560694485563998e-06, "epoch": 4.029834534197473, "total_flos": 3845789931783598080, "step": 1001700 }, { "loss": 2.8325, "learning_rate": 1.955258323160469e-06, "epoch": 4.030236833741667, "total_flos": 3846167051227607040, "step": 1001800 }, { "loss": 2.79, "learning_rate": 1.9544471977645386e-06, "epoch": 4.030639133285862, "total_flos": 3846545179807641600, "step": 1001900 }, { "loss": 2.7275, "learning_rate": 1.9536360723686078e-06, "epoch": 4.031041432830056, "total_flos": 3846921916842209280, "step": 1002000 }, { "loss": 2.8175, "learning_rate": 1.9528249469726774e-06, "epoch": 4.031443732374251, "total_flos": 3847311708910202880, "step": 1002100 }, { "loss": 2.81, "learning_rate": 1.9520138215767466e-06, "epoch": 4.0318460319184455, "total_flos": 3847674833230909440, "step": 1002200 }, { "loss": 2.775, "learning_rate": 1.9512026961808162e-06, "epoch": 4.0322483314626405, "total_flos": 3848049382033674240, "step": 1002300 }, { "loss": 2.7625, "learning_rate": 1.9503915707848854e-06, "epoch": 4.032650631006835, "total_flos": 3848436831843840000, "step": 1002400 }, { "loss": 2.765, "learning_rate": 1.949580445388955e-06, "epoch": 4.03305293055103, "total_flos": 3848819581204623360, "step": 1002500 }, { "loss": 2.8, "learning_rate": 1.9487693199930242e-06, "epoch": 4.033455230095225, "total_flos": 3849197571692359680, "step": 1002600 }, { "loss": 2.8825, "learning_rate": 1.947958194597094e-06, "epoch": 4.033857529639419, "total_flos": 3849572391368478720, "step": 1002700 }, { "loss": 2.815, "learning_rate": 1.947147069201163e-06, "epoch": 4.034259829183614, "total_flos": 3849953770428764160, "step": 1002800 }, { "loss": 2.8325, "learning_rate": 1.9463359438052327e-06, "epoch": 4.034662128727808, "total_flos": 3850335441607372800, "step": 1002900 }, { "loss": 2.8575, "learning_rate": 1.945524818409302e-06, "epoch": 4.035064428272003, "total_flos": 3850722232823500800, "step": 1003000 }, { "loss": 2.835, "learning_rate": 1.9447136930133715e-06, "epoch": 4.035466727816197, "total_flos": 3851107595315466240, "step": 1003100 }, { "loss": 2.76, "learning_rate": 1.9439025676174407e-06, "epoch": 4.035869027360392, "total_flos": 3851499166649610240, "step": 1003200 }, { "loss": 2.805, "learning_rate": 1.9430914422215107e-06, "epoch": 4.036271326904586, "total_flos": 3851887811489280000, "step": 1003300 }, { "loss": 2.7725, "learning_rate": 1.94228031682558e-06, "epoch": 4.036673626448781, "total_flos": 3852257144652165120, "step": 1003400 }, { "loss": 2.7875, "learning_rate": 1.9414691914296495e-06, "epoch": 4.037075925992976, "total_flos": 3852642114112204800, "step": 1003500 }, { "loss": 2.79, "learning_rate": 1.9406580660337187e-06, "epoch": 4.03747822553717, "total_flos": 3853021506767892480, "step": 1003600 }, { "loss": 2.8425, "learning_rate": 1.9398469406377883e-06, "epoch": 4.037880525081365, "total_flos": 3853412812539924480, "step": 1003700 }, { "loss": 2.785, "learning_rate": 1.9390358152418575e-06, "epoch": 4.0382828246255595, "total_flos": 3853814353075752960, "step": 1003800 }, { "loss": 2.735, "learning_rate": 1.938224689845927e-06, "epoch": 4.038685124169755, "total_flos": 3854189294910443520, "step": 1003900 }, { "loss": 2.835, "learning_rate": 1.9374135644499964e-06, "epoch": 4.039087423713949, "total_flos": 3854570721771909120, "step": 1004000 }, { "loss": 2.8425, "learning_rate": 1.936602439054066e-06, "epoch": 4.039489723258144, "total_flos": 3854953168391884800, "step": 1004100 }, { "loss": 2.78, "learning_rate": 1.935791313658135e-06, "epoch": 4.039892022802338, "total_flos": 3855356636908646400, "step": 1004200 }, { "loss": 2.825, "learning_rate": 1.934980188262205e-06, "epoch": 4.040294322346533, "total_flos": 3855742992602910720, "step": 1004300 }, { "loss": 2.83, "learning_rate": 1.934169062866274e-06, "epoch": 4.040696621890727, "total_flos": 3856123155388723200, "step": 1004400 }, { "loss": 2.8525, "learning_rate": 1.9333579374703436e-06, "epoch": 4.041098921434922, "total_flos": 3856510190921994240, "step": 1004500 }, { "loss": 2.8375, "learning_rate": 1.932546812074413e-06, "epoch": 4.041501220979117, "total_flos": 3856889695113768960, "step": 1004600 }, { "loss": 2.815, "learning_rate": 1.9317356866784824e-06, "epoch": 4.041903520523311, "total_flos": 3857280974329589760, "step": 1004700 }, { "loss": 2.855, "learning_rate": 1.9309245612825516e-06, "epoch": 4.042305820067506, "total_flos": 3857659628722606080, "step": 1004800 }, { "loss": 2.81, "learning_rate": 1.9301134358866212e-06, "epoch": 4.0427081196117, "total_flos": 3858033237435494400, "step": 1004900 }, { "loss": 2.8075, "learning_rate": 1.9293023104906904e-06, "epoch": 4.043110419155895, "total_flos": 3858401688932167680, "step": 1005000 }, { "loss": 2.85, "learning_rate": 1.92849118509476e-06, "epoch": 4.043512718700089, "total_flos": 3858779939670773760, "step": 1005100 }, { "loss": 2.825, "learning_rate": 1.9276800596988292e-06, "epoch": 4.0439150182442845, "total_flos": 3859168233968455680, "step": 1005200 }, { "loss": 2.8175, "learning_rate": 1.926868934302899e-06, "epoch": 4.044317317788479, "total_flos": 3859548768541224960, "step": 1005300 }, { "loss": 2.8425, "learning_rate": 1.926057808906968e-06, "epoch": 4.044719617332674, "total_flos": 3859925133788835840, "step": 1005400 }, { "loss": 2.83, "learning_rate": 1.9252466835110377e-06, "epoch": 4.045121916876869, "total_flos": 3860325872327086080, "step": 1005500 }, { "loss": 2.7975, "learning_rate": 1.924435558115107e-06, "epoch": 4.045524216421063, "total_flos": 3860710007922094080, "step": 1005600 }, { "loss": 2.85, "learning_rate": 1.9236244327191765e-06, "epoch": 4.045926515965258, "total_flos": 3861094924269711360, "step": 1005700 }, { "loss": 2.7625, "learning_rate": 1.9228133073232457e-06, "epoch": 4.046328815509452, "total_flos": 3861472537659248640, "step": 1005800 }, { "loss": 2.8275, "learning_rate": 1.9220021819273153e-06, "epoch": 4.046731115053647, "total_flos": 3861853884852080640, "step": 1005900 }, { "loss": 2.7925, "learning_rate": 1.9211910565313845e-06, "epoch": 4.047133414597841, "total_flos": 3862230058894970880, "step": 1006000 }, { "loss": 2.8275, "learning_rate": 1.920379931135454e-06, "epoch": 4.047535714142036, "total_flos": 3862632226157383680, "step": 1006100 }, { "loss": 2.8275, "learning_rate": 1.9195688057395233e-06, "epoch": 4.04793801368623, "total_flos": 3863020589501214720, "step": 1006200 }, { "loss": 2.775, "learning_rate": 1.918757680343593e-06, "epoch": 4.048340313230425, "total_flos": 3863392403014225920, "step": 1006300 }, { "loss": 2.8575, "learning_rate": 1.917946554947662e-06, "epoch": 4.048742612774619, "total_flos": 3863777717705011200, "step": 1006400 }, { "loss": 2.8075, "learning_rate": 1.9171354295517318e-06, "epoch": 4.049144912318814, "total_flos": 3864170069791764480, "step": 1006500 }, { "loss": 2.785, "learning_rate": 1.916324304155801e-06, "epoch": 4.049547211863009, "total_flos": 3864559962773360640, "step": 1006600 }, { "loss": 2.7525, "learning_rate": 1.9155131787598706e-06, "epoch": 4.0499495114072035, "total_flos": 3864952415773716480, "step": 1006700 }, { "loss": 2.775, "learning_rate": 1.9147020533639398e-06, "epoch": 4.0503518109513985, "total_flos": 3865327516945674240, "step": 1006800 }, { "loss": 2.83, "learning_rate": 1.9138909279680094e-06, "epoch": 4.050754110495593, "total_flos": 3865698767467008000, "step": 1006900 }, { "loss": 2.82, "learning_rate": 1.9130798025720786e-06, "epoch": 4.051156410039788, "total_flos": 3866090397224816640, "step": 1007000 }, { "loss": 2.8425, "learning_rate": 1.912268677176148e-06, "epoch": 4.051558709583982, "total_flos": 3866467580403732480, "step": 1007100 }, { "loss": 2.7775, "learning_rate": 1.9114575517802174e-06, "epoch": 4.051961009128177, "total_flos": 3866865636764651520, "step": 1007200 }, { "loss": 2.7975, "learning_rate": 1.910646426384287e-06, "epoch": 4.052363308672371, "total_flos": 3867246537813135360, "step": 1007300 }, { "loss": 2.815, "learning_rate": 1.9098353009883562e-06, "epoch": 4.052765608216566, "total_flos": 3867631124863733760, "step": 1007400 }, { "loss": 2.8225, "learning_rate": 1.909024175592426e-06, "epoch": 4.05316790776076, "total_flos": 3868013008492032000, "step": 1007500 }, { "loss": 2.8175, "learning_rate": 1.908213050196495e-06, "epoch": 4.053570207304955, "total_flos": 3868388189332623360, "step": 1007600 }, { "loss": 2.785, "learning_rate": 1.9074019248005647e-06, "epoch": 4.05397250684915, "total_flos": 3868773360619868160, "step": 1007700 }, { "loss": 2.8725, "learning_rate": 1.906590799404634e-06, "epoch": 4.054374806393344, "total_flos": 3869151526378598400, "step": 1007800 }, { "loss": 2.805, "learning_rate": 1.9057796740087035e-06, "epoch": 4.054777105937539, "total_flos": 3869536479904911360, "step": 1007900 }, { "loss": 2.8025, "learning_rate": 1.904968548612773e-06, "epoch": 4.055179405481733, "total_flos": 3869926808408371200, "step": 1008000 }, { "loss": 2.745, "learning_rate": 1.9041574232168425e-06, "epoch": 4.055581705025928, "total_flos": 3870317524632514560, "step": 1008100 }, { "loss": 2.7925, "learning_rate": 1.903346297820912e-06, "epoch": 4.0559840045701225, "total_flos": 3870712288023244800, "step": 1008200 }, { "loss": 2.83, "learning_rate": 1.9025351724249813e-06, "epoch": 4.0563863041143176, "total_flos": 3871097453999247360, "step": 1008300 }, { "loss": 2.82, "learning_rate": 1.9017240470290507e-06, "epoch": 4.056788603658512, "total_flos": 3871484909120655360, "step": 1008400 }, { "loss": 2.81, "learning_rate": 1.9009129216331201e-06, "epoch": 4.057190903202707, "total_flos": 3871883432870891520, "step": 1008500 }, { "loss": 2.8125, "learning_rate": 1.9001017962371895e-06, "epoch": 4.057593202746902, "total_flos": 3872268790051614720, "step": 1008600 }, { "loss": 2.785, "learning_rate": 1.899290670841259e-06, "epoch": 4.057995502291096, "total_flos": 3872662916093276160, "step": 1008700 }, { "loss": 2.815, "learning_rate": 1.8984795454453283e-06, "epoch": 4.058397801835291, "total_flos": 3873047736838533120, "step": 1008800 }, { "loss": 2.79, "learning_rate": 1.8976684200493978e-06, "epoch": 4.058800101379485, "total_flos": 3873445633862184960, "step": 1008900 }, { "loss": 2.7775, "learning_rate": 1.8968572946534672e-06, "epoch": 4.05920240092368, "total_flos": 3873822227493212160, "step": 1009000 }, { "loss": 2.805, "learning_rate": 1.8960461692575366e-06, "epoch": 4.059604700467874, "total_flos": 3874189983217152000, "step": 1009100 }, { "loss": 2.8425, "learning_rate": 1.895235043861606e-06, "epoch": 4.060007000012069, "total_flos": 3874573364615761920, "step": 1009200 }, { "loss": 2.81, "learning_rate": 1.8944239184656754e-06, "epoch": 4.060409299556263, "total_flos": 3874958519969280000, "step": 1009300 }, { "loss": 2.8025, "learning_rate": 1.8936127930697448e-06, "epoch": 4.060811599100458, "total_flos": 3875350803009884160, "step": 1009400 }, { "loss": 2.7675, "learning_rate": 1.8928016676738142e-06, "epoch": 4.061213898644652, "total_flos": 3875724018690846720, "step": 1009500 }, { "loss": 2.8925, "learning_rate": 1.8919905422778836e-06, "epoch": 4.0616161981888474, "total_flos": 3876116227374059520, "step": 1009600 }, { "loss": 2.77, "learning_rate": 1.891179416881953e-06, "epoch": 4.0620184977330425, "total_flos": 3876495848413163520, "step": 1009700 }, { "loss": 2.85, "learning_rate": 1.8903682914860224e-06, "epoch": 4.062420797277237, "total_flos": 3876887637508239360, "step": 1009800 }, { "loss": 2.83, "learning_rate": 1.8895571660900918e-06, "epoch": 4.062823096821432, "total_flos": 3877264151470632960, "step": 1009900 }, { "loss": 2.8275, "learning_rate": 1.8887460406941612e-06, "epoch": 4.063225396365626, "total_flos": 3877641998554828800, "step": 1010000 }, { "loss": 2.795, "learning_rate": 1.8879349152982306e-06, "epoch": 4.063627695909821, "total_flos": 3878033155612078080, "step": 1010100 }, { "loss": 2.805, "learning_rate": 1.8871237899023e-06, "epoch": 4.064029995454015, "total_flos": 3878408692305899520, "step": 1010200 }, { "loss": 2.7675, "learning_rate": 1.8863126645063695e-06, "epoch": 4.06443229499821, "total_flos": 3878794453141032960, "step": 1010300 }, { "loss": 2.86, "learning_rate": 1.8855015391104389e-06, "epoch": 4.064834594542404, "total_flos": 3879193640796549120, "step": 1010400 }, { "loss": 2.7725, "learning_rate": 1.8846904137145083e-06, "epoch": 4.065236894086599, "total_flos": 3879576714143109120, "step": 1010500 }, { "loss": 2.8175, "learning_rate": 1.8838792883185777e-06, "epoch": 4.065639193630793, "total_flos": 3879959410391470080, "step": 1010600 }, { "loss": 2.7725, "learning_rate": 1.883068162922647e-06, "epoch": 4.066041493174988, "total_flos": 3880348666023997440, "step": 1010700 }, { "loss": 2.8475, "learning_rate": 1.8822570375267165e-06, "epoch": 4.066443792719183, "total_flos": 3880723050178252800, "step": 1010800 }, { "loss": 2.8275, "learning_rate": 1.881445912130786e-06, "epoch": 4.066846092263377, "total_flos": 3881099978417541120, "step": 1010900 }, { "loss": 2.78, "learning_rate": 1.8806347867348553e-06, "epoch": 4.067248391807572, "total_flos": 3881468525516574720, "step": 1011000 }, { "loss": 2.84, "learning_rate": 1.8798236613389247e-06, "epoch": 4.0676506913517665, "total_flos": 3881855284865249280, "step": 1011100 }, { "loss": 2.8125, "learning_rate": 1.8790125359429941e-06, "epoch": 4.0680529908959615, "total_flos": 3882225473138135040, "step": 1011200 }, { "loss": 2.7725, "learning_rate": 1.8782014105470635e-06, "epoch": 4.068455290440156, "total_flos": 3882613783369543680, "step": 1011300 }, { "loss": 2.8125, "learning_rate": 1.877390285151133e-06, "epoch": 4.068857589984351, "total_flos": 3883001514675548160, "step": 1011400 }, { "loss": 2.775, "learning_rate": 1.8765791597552024e-06, "epoch": 4.069259889528545, "total_flos": 3883383998474219520, "step": 1011500 }, { "loss": 2.7925, "learning_rate": 1.8757680343592718e-06, "epoch": 4.06966218907274, "total_flos": 3883769036980408320, "step": 1011600 }, { "loss": 2.785, "learning_rate": 1.8749569089633412e-06, "epoch": 4.070064488616935, "total_flos": 3884146241404293120, "step": 1011700 }, { "loss": 2.7875, "learning_rate": 1.8741457835674106e-06, "epoch": 4.070466788161129, "total_flos": 3884544855445647360, "step": 1011800 }, { "loss": 2.8225, "learning_rate": 1.87333465817148e-06, "epoch": 4.070869087705324, "total_flos": 3884931444834570240, "step": 1011900 }, { "loss": 2.8025, "learning_rate": 1.8725235327755494e-06, "epoch": 4.071271387249518, "total_flos": 3885318639705108480, "step": 1012000 }, { "loss": 2.82, "learning_rate": 1.8717124073796188e-06, "epoch": 4.071673686793713, "total_flos": 3885690049563709440, "step": 1012100 }, { "loss": 2.79, "learning_rate": 1.8709012819836882e-06, "epoch": 4.072075986337907, "total_flos": 3886069729026478080, "step": 1012200 }, { "loss": 2.8025, "learning_rate": 1.8700901565877578e-06, "epoch": 4.072478285882102, "total_flos": 3886465028852674560, "step": 1012300 }, { "loss": 2.8475, "learning_rate": 1.8692790311918272e-06, "epoch": 4.072880585426296, "total_flos": 3886861534330859520, "step": 1012400 }, { "loss": 2.83, "learning_rate": 1.8684679057958966e-06, "epoch": 4.073282884970491, "total_flos": 3887245962044190720, "step": 1012500 }, { "loss": 2.8075, "learning_rate": 1.867656780399966e-06, "epoch": 4.0736851845146855, "total_flos": 3887635706311004160, "step": 1012600 }, { "loss": 2.77, "learning_rate": 1.8668456550040357e-06, "epoch": 4.0740874840588805, "total_flos": 3888026342866513920, "step": 1012700 }, { "loss": 2.835, "learning_rate": 1.866034529608105e-06, "epoch": 4.074489783603076, "total_flos": 3888425036576501760, "step": 1012800 }, { "loss": 2.7975, "learning_rate": 1.8652234042121745e-06, "epoch": 4.07489208314727, "total_flos": 3888799680981626880, "step": 1012900 }, { "loss": 2.775, "learning_rate": 1.8644122788162439e-06, "epoch": 4.075294382691465, "total_flos": 3889187008633221120, "step": 1013000 }, { "loss": 2.7575, "learning_rate": 1.8636011534203133e-06, "epoch": 4.075696682235659, "total_flos": 3889568892261519360, "step": 1013100 }, { "loss": 2.785, "learning_rate": 1.8627900280243827e-06, "epoch": 4.076098981779854, "total_flos": 3889966964556165120, "step": 1013200 }, { "loss": 2.79, "learning_rate": 1.8619789026284521e-06, "epoch": 4.076501281324048, "total_flos": 3890341014102159360, "step": 1013300 }, { "loss": 2.8125, "learning_rate": 1.8611677772325215e-06, "epoch": 4.076903580868243, "total_flos": 3890703607298641920, "step": 1013400 }, { "loss": 2.79, "learning_rate": 1.860356651836591e-06, "epoch": 4.077305880412437, "total_flos": 3891091551054336000, "step": 1013500 }, { "loss": 2.86, "learning_rate": 1.8595455264406603e-06, "epoch": 4.077708179956632, "total_flos": 3891474592533442560, "step": 1013600 }, { "loss": 2.79, "learning_rate": 1.8587344010447297e-06, "epoch": 4.078110479500827, "total_flos": 3891851982850805760, "step": 1013700 }, { "loss": 2.825, "learning_rate": 1.8579232756487992e-06, "epoch": 4.078512779045021, "total_flos": 3892236516788981760, "step": 1013800 }, { "loss": 2.8125, "learning_rate": 1.8571121502528686e-06, "epoch": 4.078915078589216, "total_flos": 3892614082377338880, "step": 1013900 }, { "loss": 2.865, "learning_rate": 1.856301024856938e-06, "epoch": 4.07931737813341, "total_flos": 3892985922446561280, "step": 1014000 }, { "loss": 2.84, "learning_rate": 1.8554898994610074e-06, "epoch": 4.0797196776776055, "total_flos": 3893358341441187840, "step": 1014100 }, { "loss": 2.79, "learning_rate": 1.8546787740650768e-06, "epoch": 4.0801219772218, "total_flos": 3893749673769431040, "step": 1014200 }, { "loss": 2.7975, "learning_rate": 1.8538676486691462e-06, "epoch": 4.080524276765995, "total_flos": 3894120807443435520, "step": 1014300 }, { "loss": 2.8425, "learning_rate": 1.8530565232732156e-06, "epoch": 4.080926576310189, "total_flos": 3894487421250293760, "step": 1014400 }, { "loss": 2.81, "learning_rate": 1.852245397877285e-06, "epoch": 4.081328875854384, "total_flos": 3894872863410892800, "step": 1014500 }, { "loss": 2.805, "learning_rate": 1.8514342724813544e-06, "epoch": 4.081731175398578, "total_flos": 3895256303233167360, "step": 1014600 }, { "loss": 2.775, "learning_rate": 1.8506231470854238e-06, "epoch": 4.082133474942773, "total_flos": 3895647375310540800, "step": 1014700 }, { "loss": 2.8125, "learning_rate": 1.8498120216894932e-06, "epoch": 4.082535774486968, "total_flos": 3896040911804313600, "step": 1014800 }, { "loss": 2.7725, "learning_rate": 1.8490008962935626e-06, "epoch": 4.082938074031162, "total_flos": 3896426093714042880, "step": 1014900 }, { "loss": 2.795, "learning_rate": 1.848189770897632e-06, "epoch": 4.083340373575357, "total_flos": 3896799936121589760, "step": 1015000 }, { "loss": 2.8175, "learning_rate": 1.8473786455017015e-06, "epoch": 4.083742673119551, "total_flos": 3897173656370565120, "step": 1015100 }, { "loss": 2.8175, "learning_rate": 1.8465675201057709e-06, "epoch": 4.084144972663746, "total_flos": 3897550632411033600, "step": 1015200 }, { "loss": 2.825, "learning_rate": 1.8457563947098403e-06, "epoch": 4.08454727220794, "total_flos": 3897937551096975360, "step": 1015300 }, { "loss": 2.7825, "learning_rate": 1.8449452693139097e-06, "epoch": 4.084949571752135, "total_flos": 3898326440253788160, "step": 1015400 }, { "loss": 2.805, "learning_rate": 1.844134143917979e-06, "epoch": 4.0853518712963295, "total_flos": 3898715504681594880, "step": 1015500 }, { "loss": 2.8125, "learning_rate": 1.8433230185220485e-06, "epoch": 4.0857541708405245, "total_flos": 3899091784949329920, "step": 1015600 }, { "loss": 2.785, "learning_rate": 1.842511893126118e-06, "epoch": 4.086156470384719, "total_flos": 3899466965789921280, "step": 1015700 }, { "loss": 2.8, "learning_rate": 1.8417007677301873e-06, "epoch": 4.086558769928914, "total_flos": 3899839703459082240, "step": 1015800 }, { "loss": 2.8075, "learning_rate": 1.8408896423342567e-06, "epoch": 4.086961069473109, "total_flos": 3900220546083901440, "step": 1015900 }, { "loss": 2.805, "learning_rate": 1.8400785169383261e-06, "epoch": 4.087363369017303, "total_flos": 3900593952969584640, "step": 1016000 }, { "loss": 2.8425, "learning_rate": 1.8392673915423955e-06, "epoch": 4.087765668561498, "total_flos": 3900978322259251200, "step": 1016100 }, { "loss": 2.7775, "learning_rate": 1.838456266146465e-06, "epoch": 4.088167968105692, "total_flos": 3901376516712468480, "step": 1016200 }, { "loss": 2.785, "learning_rate": 1.8376451407505343e-06, "epoch": 4.088570267649887, "total_flos": 3901753078476042240, "step": 1016300 }, { "loss": 2.795, "learning_rate": 1.8368340153546037e-06, "epoch": 4.088972567194081, "total_flos": 3902160981880074240, "step": 1016400 }, { "loss": 2.8375, "learning_rate": 1.8360228899586732e-06, "epoch": 4.089374866738276, "total_flos": 3902551135112540160, "step": 1016500 }, { "loss": 2.7975, "learning_rate": 1.8352117645627426e-06, "epoch": 4.08977716628247, "total_flos": 3902946031284326400, "step": 1016600 }, { "loss": 2.83, "learning_rate": 1.834400639166812e-06, "epoch": 4.090179465826665, "total_flos": 3903327293497282560, "step": 1016700 }, { "loss": 2.7625, "learning_rate": 1.8335895137708814e-06, "epoch": 4.09058176537086, "total_flos": 3903717839761674240, "step": 1016800 }, { "loss": 2.8225, "learning_rate": 1.8327783883749508e-06, "epoch": 4.090984064915054, "total_flos": 3904112672198553600, "step": 1016900 }, { "loss": 2.785, "learning_rate": 1.8319672629790202e-06, "epoch": 4.091386364459249, "total_flos": 3904506803551457280, "step": 1017000 }, { "loss": 2.84, "learning_rate": 1.8311561375830896e-06, "epoch": 4.0917886640034435, "total_flos": 3904894094024355840, "step": 1017100 }, { "loss": 2.8175, "learning_rate": 1.830345012187159e-06, "epoch": 4.092190963547639, "total_flos": 3905280725903216640, "step": 1017200 }, { "loss": 2.785, "learning_rate": 1.8295338867912284e-06, "epoch": 4.092593263091833, "total_flos": 3905675223731834880, "step": 1017300 }, { "loss": 2.82, "learning_rate": 1.8287227613952978e-06, "epoch": 4.092995562636028, "total_flos": 3906046044042547200, "step": 1017400 }, { "loss": 2.7925, "learning_rate": 1.8279116359993677e-06, "epoch": 4.093397862180222, "total_flos": 3906416789995868160, "step": 1017500 }, { "loss": 2.8125, "learning_rate": 1.827100510603437e-06, "epoch": 4.093800161724417, "total_flos": 3906802264023920640, "step": 1017600 }, { "loss": 2.805, "learning_rate": 1.8262893852075065e-06, "epoch": 4.094202461268611, "total_flos": 3907175304433889280, "step": 1017700 }, { "loss": 2.79, "learning_rate": 1.8254782598115759e-06, "epoch": 4.094604760812806, "total_flos": 3907539772498882560, "step": 1017800 }, { "loss": 2.8775, "learning_rate": 1.8246671344156453e-06, "epoch": 4.095007060357001, "total_flos": 3907919951218421760, "step": 1017900 }, { "loss": 2.81, "learning_rate": 1.8238560090197147e-06, "epoch": 4.095409359901195, "total_flos": 3908329931318169600, "step": 1018000 }, { "loss": 2.735, "learning_rate": 1.823044883623784e-06, "epoch": 4.09581165944539, "total_flos": 3908720557251194880, "step": 1018100 }, { "loss": 2.805, "learning_rate": 1.8222337582278535e-06, "epoch": 4.096213958989584, "total_flos": 3909095313192407040, "step": 1018200 }, { "loss": 2.76, "learning_rate": 1.821422632831923e-06, "epoch": 4.096616258533779, "total_flos": 3909454347856588800, "step": 1018300 }, { "loss": 2.8425, "learning_rate": 1.8206115074359923e-06, "epoch": 4.097018558077973, "total_flos": 3909839327939112960, "step": 1018400 }, { "loss": 2.8425, "learning_rate": 1.8198003820400617e-06, "epoch": 4.0974208576221685, "total_flos": 3910227802819031040, "step": 1018500 }, { "loss": 2.8625, "learning_rate": 1.8189892566441311e-06, "epoch": 4.097823157166363, "total_flos": 3910611513514659840, "step": 1018600 }, { "loss": 2.815, "learning_rate": 1.8181781312482005e-06, "epoch": 4.098225456710558, "total_flos": 3910986943983636480, "step": 1018700 }, { "loss": 2.77, "learning_rate": 1.81736700585227e-06, "epoch": 4.098627756254752, "total_flos": 3911343232735580160, "step": 1018800 }, { "loss": 2.78, "learning_rate": 1.8165558804563394e-06, "epoch": 4.099030055798947, "total_flos": 3911733651530158080, "step": 1018900 }, { "loss": 2.855, "learning_rate": 1.8157447550604088e-06, "epoch": 4.099432355343142, "total_flos": 3912109899930439680, "step": 1019000 }, { "loss": 2.8425, "learning_rate": 1.8149336296644782e-06, "epoch": 4.099834654887336, "total_flos": 3912476848345559040, "step": 1019100 }, { "loss": 2.705, "learning_rate": 1.8141225042685476e-06, "epoch": 4.100236954431531, "total_flos": 3912865312602992640, "step": 1019200 }, { "loss": 2.8125, "learning_rate": 1.813311378872617e-06, "epoch": 4.100639253975725, "total_flos": 3913242442669486080, "step": 1019300 }, { "loss": 2.8525, "learning_rate": 1.8125002534766864e-06, "epoch": 4.10104155351992, "total_flos": 3913616019514920960, "step": 1019400 }, { "loss": 2.8025, "learning_rate": 1.8116891280807558e-06, "epoch": 4.101443853064114, "total_flos": 3914001190802165760, "step": 1019500 }, { "loss": 2.81, "learning_rate": 1.8108780026848252e-06, "epoch": 4.101846152608309, "total_flos": 3914398769151283200, "step": 1019600 }, { "loss": 2.8075, "learning_rate": 1.8100668772888946e-06, "epoch": 4.102248452152503, "total_flos": 3914785082355609600, "step": 1019700 }, { "loss": 2.8625, "learning_rate": 1.809255751892964e-06, "epoch": 4.102650751696698, "total_flos": 3915160512824586240, "step": 1019800 }, { "loss": 2.81, "learning_rate": 1.8084446264970334e-06, "epoch": 4.103053051240893, "total_flos": 3915562897847930880, "step": 1019900 }, { "loss": 2.815, "learning_rate": 1.8076335011011028e-06, "epoch": 4.1034553507850875, "total_flos": 3915934987545538560, "step": 1020000 }, { "loss": 2.8225, "learning_rate": 1.8068223757051723e-06, "epoch": 4.1038576503292825, "total_flos": 3916323685497630720, "step": 1020100 }, { "loss": 2.8075, "learning_rate": 1.8060112503092417e-06, "epoch": 4.104259949873477, "total_flos": 3916703896084623360, "step": 1020200 }, { "loss": 2.8, "learning_rate": 1.805200124913311e-06, "epoch": 4.104662249417672, "total_flos": 3917074418965770240, "step": 1020300 }, { "loss": 2.835, "learning_rate": 1.8043889995173805e-06, "epoch": 4.105064548961866, "total_flos": 3917477584741724160, "step": 1020400 }, { "loss": 2.8175, "learning_rate": 1.8035778741214499e-06, "epoch": 4.105466848506061, "total_flos": 3917844575646781440, "step": 1020500 }, { "loss": 2.795, "learning_rate": 1.8027667487255193e-06, "epoch": 4.105869148050255, "total_flos": 3918236008888627200, "step": 1020600 }, { "loss": 2.76, "learning_rate": 1.8019556233295887e-06, "epoch": 4.10627144759445, "total_flos": 3918625020204011520, "step": 1020700 }, { "loss": 2.7825, "learning_rate": 1.8011444979336581e-06, "epoch": 4.106673747138644, "total_flos": 3919009208911441920, "step": 1020800 }, { "loss": 2.815, "learning_rate": 1.8003333725377275e-06, "epoch": 4.107076046682839, "total_flos": 3919405905594347520, "step": 1020900 }, { "loss": 2.8225, "learning_rate": 1.799522247141797e-06, "epoch": 4.107478346227034, "total_flos": 3919776683415121920, "step": 1021000 }, { "loss": 2.755, "learning_rate": 1.7987111217458663e-06, "epoch": 4.107880645771228, "total_flos": 3920153181443788800, "step": 1021100 }, { "loss": 2.835, "learning_rate": 1.7978999963499357e-06, "epoch": 4.108282945315423, "total_flos": 3920530587694878720, "step": 1021200 }, { "loss": 2.85, "learning_rate": 1.7970888709540051e-06, "epoch": 4.108685244859617, "total_flos": 3920908795943546880, "step": 1021300 }, { "loss": 2.765, "learning_rate": 1.7962777455580746e-06, "epoch": 4.109087544403812, "total_flos": 3921296309488619520, "step": 1021400 }, { "loss": 2.79, "learning_rate": 1.795466620162144e-06, "epoch": 4.1094898439480065, "total_flos": 3921688087961210880, "step": 1021500 }, { "loss": 2.7975, "learning_rate": 1.7946554947662134e-06, "epoch": 4.1098921434922016, "total_flos": 3922050532442910720, "step": 1021600 }, { "loss": 2.8375, "learning_rate": 1.7938443693702828e-06, "epoch": 4.110294443036396, "total_flos": 3922421166860144640, "step": 1021700 }, { "loss": 2.8125, "learning_rate": 1.7930332439743522e-06, "epoch": 4.110696742580591, "total_flos": 3922807740315340800, "step": 1021800 }, { "loss": 2.785, "learning_rate": 1.7922221185784216e-06, "epoch": 4.111099042124785, "total_flos": 3923183091115683840, "step": 1021900 }, { "loss": 2.81, "learning_rate": 1.791410993182491e-06, "epoch": 4.11150134166898, "total_flos": 3923570121337712640, "step": 1022000 }, { "loss": 2.8, "learning_rate": 1.7905998677865604e-06, "epoch": 4.111903641213175, "total_flos": 3923951643801538560, "step": 1022100 }, { "loss": 2.805, "learning_rate": 1.7897887423906302e-06, "epoch": 4.112305940757369, "total_flos": 3924322118881505280, "step": 1022200 }, { "loss": 2.76, "learning_rate": 1.7889776169946996e-06, "epoch": 4.112708240301564, "total_flos": 3924708591423098880, "step": 1022300 }, { "loss": 2.7875, "learning_rate": 1.788166491598769e-06, "epoch": 4.113110539845758, "total_flos": 3925081015728967680, "step": 1022400 }, { "loss": 2.81, "learning_rate": 1.7873553662028385e-06, "epoch": 4.113512839389953, "total_flos": 3925464960119255040, "step": 1022500 }, { "loss": 2.845, "learning_rate": 1.7865442408069079e-06, "epoch": 4.113915138934147, "total_flos": 3925852091254886400, "step": 1022600 }, { "loss": 2.8075, "learning_rate": 1.7857331154109773e-06, "epoch": 4.114317438478342, "total_flos": 3926224802367836160, "step": 1022700 }, { "loss": 2.7925, "learning_rate": 1.7849219900150467e-06, "epoch": 4.114719738022536, "total_flos": 3926591235592458240, "step": 1022800 }, { "loss": 2.8275, "learning_rate": 1.784110864619116e-06, "epoch": 4.1151220375667314, "total_flos": 3926977570041753600, "step": 1022900 }, { "loss": 2.765, "learning_rate": 1.7832997392231855e-06, "epoch": 4.1155243371109265, "total_flos": 3927357966522224640, "step": 1023000 }, { "loss": 2.7675, "learning_rate": 1.782488613827255e-06, "epoch": 4.115926636655121, "total_flos": 3927739313715056640, "step": 1023100 }, { "loss": 2.8025, "learning_rate": 1.7816774884313243e-06, "epoch": 4.116328936199316, "total_flos": 3928144179088527360, "step": 1023200 }, { "loss": 2.76, "learning_rate": 1.7808663630353937e-06, "epoch": 4.11673123574351, "total_flos": 3928530497604096000, "step": 1023300 }, { "loss": 2.77, "learning_rate": 1.7800552376394631e-06, "epoch": 4.117133535287705, "total_flos": 3928903113114685440, "step": 1023400 }, { "loss": 2.8125, "learning_rate": 1.7792441122435325e-06, "epoch": 4.117535834831899, "total_flos": 3929283281211740160, "step": 1023500 }, { "loss": 2.785, "learning_rate": 1.778432986847602e-06, "epoch": 4.117938134376094, "total_flos": 3929663560844881920, "step": 1023600 }, { "loss": 2.8025, "learning_rate": 1.7776218614516714e-06, "epoch": 4.118340433920288, "total_flos": 3930053660964925440, "step": 1023700 }, { "loss": 2.8275, "learning_rate": 1.7768107360557408e-06, "epoch": 4.118742733464483, "total_flos": 3930460188757217280, "step": 1023800 }, { "loss": 2.7725, "learning_rate": 1.7759996106598102e-06, "epoch": 4.119145033008677, "total_flos": 3930836272508989440, "step": 1023900 }, { "loss": 2.815, "learning_rate": 1.7751884852638796e-06, "epoch": 4.119547332552872, "total_flos": 3931217125756293120, "step": 1024000 }, { "loss": 2.7675, "learning_rate": 1.774377359867949e-06, "epoch": 4.119949632097067, "total_flos": 3931592110080921600, "step": 1024100 }, { "loss": 2.8125, "learning_rate": 1.7735662344720184e-06, "epoch": 4.120351931641261, "total_flos": 3931958729199022080, "step": 1024200 }, { "loss": 2.7775, "learning_rate": 1.7727551090760878e-06, "epoch": 4.120754231185456, "total_flos": 3932349806587637760, "step": 1024300 }, { "loss": 2.8125, "learning_rate": 1.7719439836801572e-06, "epoch": 4.1211565307296505, "total_flos": 3932739375583457280, "step": 1024400 }, { "loss": 2.8225, "learning_rate": 1.7711328582842266e-06, "epoch": 4.1215588302738455, "total_flos": 3933104518176215040, "step": 1024500 }, { "loss": 2.8375, "learning_rate": 1.770321732888296e-06, "epoch": 4.12196112981804, "total_flos": 3933502266485084160, "step": 1024600 }, { "loss": 2.79, "learning_rate": 1.7695106074923654e-06, "epoch": 4.122363429362235, "total_flos": 3933871960812441600, "step": 1024700 }, { "loss": 2.81, "learning_rate": 1.7686994820964348e-06, "epoch": 4.122765728906429, "total_flos": 3934263526835343360, "step": 1024800 }, { "loss": 2.785, "learning_rate": 1.7678883567005042e-06, "epoch": 4.123168028450624, "total_flos": 3934655831120916480, "step": 1024900 }, { "loss": 2.8, "learning_rate": 1.7670772313045737e-06, "epoch": 4.123570327994819, "total_flos": 3935030873869209600, "step": 1025000 }, { "loss": 2.7875, "learning_rate": 1.766266105908643e-06, "epoch": 4.123972627539013, "total_flos": 3935408195140423680, "step": 1025100 }, { "loss": 2.805, "learning_rate": 1.7654549805127125e-06, "epoch": 4.124374927083208, "total_flos": 3935781039034429440, "step": 1025200 }, { "loss": 2.7875, "learning_rate": 1.7646438551167819e-06, "epoch": 4.124777226627402, "total_flos": 3936183992360693760, "step": 1025300 }, { "loss": 2.82, "learning_rate": 1.7638327297208513e-06, "epoch": 4.125179526171597, "total_flos": 3936565212083712000, "step": 1025400 }, { "loss": 2.74, "learning_rate": 1.7630216043249207e-06, "epoch": 4.125581825715791, "total_flos": 3936952454755430400, "step": 1025500 }, { "loss": 2.81, "learning_rate": 1.76221047892899e-06, "epoch": 4.125984125259986, "total_flos": 3937318048803778560, "step": 1025600 }, { "loss": 2.7925, "learning_rate": 1.7613993535330595e-06, "epoch": 4.12638642480418, "total_flos": 3937694881440706560, "step": 1025700 }, { "loss": 2.81, "learning_rate": 1.760588228137129e-06, "epoch": 4.126788724348375, "total_flos": 3938080551984721920, "step": 1025800 }, { "loss": 2.7825, "learning_rate": 1.7597771027411983e-06, "epoch": 4.1271910238925695, "total_flos": 3938462738353827840, "step": 1025900 }, { "loss": 2.8225, "learning_rate": 1.7589659773452677e-06, "epoch": 4.1275933234367645, "total_flos": 3938824789803601920, "step": 1026000 }, { "loss": 2.7675, "learning_rate": 1.7581548519493371e-06, "epoch": 4.12799562298096, "total_flos": 3939201813645250560, "step": 1026100 }, { "loss": 2.7875, "learning_rate": 1.7573437265534065e-06, "epoch": 4.128397922525154, "total_flos": 3939583660094853120, "step": 1026200 }, { "loss": 2.775, "learning_rate": 1.756532601157476e-06, "epoch": 4.128800222069349, "total_flos": 3939952339974942720, "step": 1026300 }, { "loss": 2.815, "learning_rate": 1.7557214757615454e-06, "epoch": 4.129202521613543, "total_flos": 3940344304341012480, "step": 1026400 }, { "loss": 2.815, "learning_rate": 1.7549103503656148e-06, "epoch": 4.129604821157738, "total_flos": 3940730782193848320, "step": 1026500 }, { "loss": 2.76, "learning_rate": 1.7540992249696842e-06, "epoch": 4.130007120701932, "total_flos": 3941091027821260800, "step": 1026600 }, { "loss": 2.825, "learning_rate": 1.7532880995737536e-06, "epoch": 4.130409420246127, "total_flos": 3941471955425955840, "step": 1026700 }, { "loss": 2.7725, "learning_rate": 1.752476974177823e-06, "epoch": 4.130811719790321, "total_flos": 3941854067437670400, "step": 1026800 }, { "loss": 2.8425, "learning_rate": 1.7516658487818924e-06, "epoch": 4.131214019334516, "total_flos": 3942232461579816960, "step": 1026900 }, { "loss": 2.8, "learning_rate": 1.7508547233859622e-06, "epoch": 4.13161631887871, "total_flos": 3942607785823948800, "step": 1027000 }, { "loss": 2.8425, "learning_rate": 1.7500435979900316e-06, "epoch": 4.132018618422905, "total_flos": 3942998640140390400, "step": 1027100 }, { "loss": 2.8125, "learning_rate": 1.749232472594101e-06, "epoch": 4.1324209179671, "total_flos": 3943383933586206720, "step": 1027200 }, { "loss": 2.7475, "learning_rate": 1.7484213471981704e-06, "epoch": 4.1328232175112944, "total_flos": 3943767453077114880, "step": 1027300 }, { "loss": 2.8225, "learning_rate": 1.7476102218022399e-06, "epoch": 4.1332255170554895, "total_flos": 3944150298040258560, "step": 1027400 }, { "loss": 2.75, "learning_rate": 1.7467990964063093e-06, "epoch": 4.133627816599684, "total_flos": 3944516773754818560, "step": 1027500 }, { "loss": 2.885, "learning_rate": 1.7459879710103787e-06, "epoch": 4.134030116143879, "total_flos": 3944887817137704960, "step": 1027600 }, { "loss": 2.845, "learning_rate": 1.745176845614448e-06, "epoch": 4.134432415688073, "total_flos": 3945270943596687360, "step": 1027700 }, { "loss": 2.8, "learning_rate": 1.7443657202185175e-06, "epoch": 4.134834715232268, "total_flos": 3945651446302003200, "step": 1027800 }, { "loss": 2.75, "learning_rate": 1.743554594822587e-06, "epoch": 4.135237014776462, "total_flos": 3946039092628131840, "step": 1027900 }, { "loss": 2.7975, "learning_rate": 1.7427434694266563e-06, "epoch": 4.135639314320657, "total_flos": 3946435475947745280, "step": 1028000 }, { "loss": 2.795, "learning_rate": 1.7419323440307257e-06, "epoch": 4.136041613864852, "total_flos": 3946813801043742720, "step": 1028100 }, { "loss": 2.7875, "learning_rate": 1.7411212186347951e-06, "epoch": 4.136443913409046, "total_flos": 3947193884160921600, "step": 1028200 }, { "loss": 2.855, "learning_rate": 1.7403100932388645e-06, "epoch": 4.136846212953241, "total_flos": 3947586193757736960, "step": 1028300 }, { "loss": 2.8175, "learning_rate": 1.739498967842934e-06, "epoch": 4.137248512497435, "total_flos": 3947958825202053120, "step": 1028400 }, { "loss": 2.775, "learning_rate": 1.7386878424470033e-06, "epoch": 4.13765081204163, "total_flos": 3948346110363709440, "step": 1028500 }, { "loss": 2.765, "learning_rate": 1.7378767170510727e-06, "epoch": 4.138053111585824, "total_flos": 3948708703560192000, "step": 1028600 }, { "loss": 2.7775, "learning_rate": 1.7370655916551422e-06, "epoch": 4.138455411130019, "total_flos": 3949100598880112640, "step": 1028700 }, { "loss": 2.7875, "learning_rate": 1.7362544662592116e-06, "epoch": 4.1388577106742135, "total_flos": 3949494852391587840, "step": 1028800 }, { "loss": 2.7825, "learning_rate": 1.735443340863281e-06, "epoch": 4.1392600102184085, "total_flos": 3949876863489699840, "step": 1028900 }, { "loss": 2.8525, "learning_rate": 1.7346322154673504e-06, "epoch": 4.139662309762603, "total_flos": 3950261636433776640, "step": 1029000 }, { "loss": 2.8125, "learning_rate": 1.7338210900714198e-06, "epoch": 4.140064609306798, "total_flos": 3950638283177226240, "step": 1029100 }, { "loss": 2.7375, "learning_rate": 1.7330099646754892e-06, "epoch": 4.140466908850993, "total_flos": 3951027156400312320, "step": 1029200 }, { "loss": 2.7725, "learning_rate": 1.7321988392795586e-06, "epoch": 4.140869208395187, "total_flos": 3951410851162214400, "step": 1029300 }, { "loss": 2.8425, "learning_rate": 1.731387713883628e-06, "epoch": 4.141271507939382, "total_flos": 3951803197937725440, "step": 1029400 }, { "loss": 2.8175, "learning_rate": 1.7305765884876974e-06, "epoch": 4.141673807483576, "total_flos": 3952178336288378880, "step": 1029500 }, { "loss": 2.745, "learning_rate": 1.7297654630917668e-06, "epoch": 4.142076107027771, "total_flos": 3952569758907740160, "step": 1029600 }, { "loss": 2.8375, "learning_rate": 1.7289543376958362e-06, "epoch": 4.142478406571965, "total_flos": 3952950718379888640, "step": 1029700 }, { "loss": 2.845, "learning_rate": 1.7281432122999056e-06, "epoch": 4.14288070611616, "total_flos": 3953336229586636800, "step": 1029800 }, { "loss": 2.815, "learning_rate": 1.727332086903975e-06, "epoch": 4.143283005660354, "total_flos": 3953724391103262720, "step": 1029900 }, { "loss": 2.8425, "learning_rate": 1.7265209615080445e-06, "epoch": 4.143685305204549, "total_flos": 3954098950528512000, "step": 1030000 }, { "loss": 2.79, "learning_rate": 1.7257098361121139e-06, "epoch": 4.144087604748744, "total_flos": 3954486618099609600, "step": 1030100 }, { "loss": 2.8225, "learning_rate": 1.7248987107161833e-06, "epoch": 4.144489904292938, "total_flos": 3954859122074112000, "step": 1030200 }, { "loss": 2.8175, "learning_rate": 1.7240875853202527e-06, "epoch": 4.144892203837133, "total_flos": 3955236783264829440, "step": 1030300 }, { "loss": 2.775, "learning_rate": 1.723276459924322e-06, "epoch": 4.1452945033813275, "total_flos": 3955629698343260160, "step": 1030400 }, { "loss": 2.79, "learning_rate": 1.7224653345283915e-06, "epoch": 4.145696802925523, "total_flos": 3955999111174778880, "step": 1030500 }, { "loss": 2.81, "learning_rate": 1.721654209132461e-06, "epoch": 4.146099102469717, "total_flos": 3956369023263068160, "step": 1030600 }, { "loss": 2.85, "learning_rate": 1.7208430837365303e-06, "epoch": 4.146501402013912, "total_flos": 3956760461816156160, "step": 1030700 }, { "loss": 2.7775, "learning_rate": 1.7200319583405997e-06, "epoch": 4.146903701558106, "total_flos": 3957151459536138240, "step": 1030800 }, { "loss": 2.81, "learning_rate": 1.7192208329446691e-06, "epoch": 4.147306001102301, "total_flos": 3957527686691450880, "step": 1030900 }, { "loss": 2.7825, "learning_rate": 1.7184097075487385e-06, "epoch": 4.147708300646495, "total_flos": 3957914334504038400, "step": 1031000 }, { "loss": 2.8425, "learning_rate": 1.717598582152808e-06, "epoch": 4.14811060019069, "total_flos": 3958298602880102400, "step": 1031100 }, { "loss": 2.8125, "learning_rate": 1.7167874567568773e-06, "epoch": 4.148512899734885, "total_flos": 3958680173145108480, "step": 1031200 }, { "loss": 2.8175, "learning_rate": 1.7159763313609468e-06, "epoch": 4.148915199279079, "total_flos": 3959059719826821120, "step": 1031300 }, { "loss": 2.8025, "learning_rate": 1.7151652059650162e-06, "epoch": 4.149317498823274, "total_flos": 3959421585383116800, "step": 1031400 }, { "loss": 2.815, "learning_rate": 1.7143540805690856e-06, "epoch": 4.149719798367468, "total_flos": 3959801450739363840, "step": 1031500 }, { "loss": 2.795, "learning_rate": 1.713542955173155e-06, "epoch": 4.150122097911663, "total_flos": 3960176057965793280, "step": 1031600 }, { "loss": 2.7875, "learning_rate": 1.7127318297772246e-06, "epoch": 4.150524397455857, "total_flos": 3960556911213096960, "step": 1031700 }, { "loss": 2.8275, "learning_rate": 1.711920704381294e-06, "epoch": 4.1509266970000525, "total_flos": 3960939878334812160, "step": 1031800 }, { "loss": 2.7925, "learning_rate": 1.7111095789853634e-06, "epoch": 4.151328996544247, "total_flos": 3961323870526279680, "step": 1031900 }, { "loss": 2.8525, "learning_rate": 1.7102984535894328e-06, "epoch": 4.151731296088442, "total_flos": 3961707480308305920, "step": 1032000 }, { "loss": 2.795, "learning_rate": 1.7094873281935022e-06, "epoch": 4.152133595632636, "total_flos": 3962092672840519680, "step": 1032100 }, { "loss": 2.79, "learning_rate": 1.7086762027975716e-06, "epoch": 4.152535895176831, "total_flos": 3962482220591370240, "step": 1032200 }, { "loss": 2.8475, "learning_rate": 1.707865077401641e-06, "epoch": 4.152938194721026, "total_flos": 3962876925558435840, "step": 1032300 }, { "loss": 2.8275, "learning_rate": 1.7070539520057107e-06, "epoch": 4.15334049426522, "total_flos": 3963255181608284160, "step": 1032400 }, { "loss": 2.785, "learning_rate": 1.70624282660978e-06, "epoch": 4.153742793809415, "total_flos": 3963647023815782400, "step": 1032500 }, { "loss": 2.75, "learning_rate": 1.7054317012138495e-06, "epoch": 4.154145093353609, "total_flos": 3964038042780733440, "step": 1032600 }, { "loss": 2.7575, "learning_rate": 1.7046205758179189e-06, "epoch": 4.154547392897804, "total_flos": 3964428934275870720, "step": 1032700 }, { "loss": 2.7925, "learning_rate": 1.7038094504219883e-06, "epoch": 4.154949692441998, "total_flos": 3964812262562058240, "step": 1032800 }, { "loss": 2.825, "learning_rate": 1.7029983250260577e-06, "epoch": 4.155351991986193, "total_flos": 3965195245617500160, "step": 1032900 }, { "loss": 2.775, "learning_rate": 1.7021871996301271e-06, "epoch": 4.155754291530387, "total_flos": 3965568344451133440, "step": 1033000 }, { "loss": 2.805, "learning_rate": 1.7013760742341965e-06, "epoch": 4.156156591074582, "total_flos": 3965934310286438400, "step": 1033100 }, { "loss": 2.7775, "learning_rate": 1.700564948838266e-06, "epoch": 4.156558890618777, "total_flos": 3966310415283179520, "step": 1033200 }, { "loss": 2.8325, "learning_rate": 1.6997538234423353e-06, "epoch": 4.1569611901629715, "total_flos": 3966701205864714240, "step": 1033300 }, { "loss": 2.775, "learning_rate": 1.6989426980464047e-06, "epoch": 4.1573634897071665, "total_flos": 3967100929955696640, "step": 1033400 }, { "loss": 2.865, "learning_rate": 1.6981315726504741e-06, "epoch": 4.157765789251361, "total_flos": 3967479355965296640, "step": 1033500 }, { "loss": 2.7675, "learning_rate": 1.6973204472545436e-06, "epoch": 4.158168088795556, "total_flos": 3967864495385088000, "step": 1033600 }, { "loss": 2.775, "learning_rate": 1.696509321858613e-06, "epoch": 4.15857038833975, "total_flos": 3968236558526484480, "step": 1033700 }, { "loss": 2.8175, "learning_rate": 1.6956981964626824e-06, "epoch": 4.158972687883945, "total_flos": 3968630588965785600, "step": 1033800 }, { "loss": 2.7925, "learning_rate": 1.6948870710667518e-06, "epoch": 4.159374987428139, "total_flos": 3969034779811491840, "step": 1033900 }, { "loss": 2.7525, "learning_rate": 1.6940759456708212e-06, "epoch": 4.159777286972334, "total_flos": 3969421220485632000, "step": 1034000 }, { "loss": 2.77, "learning_rate": 1.6932648202748906e-06, "epoch": 4.160179586516528, "total_flos": 3969811724260085760, "step": 1034100 }, { "loss": 2.79, "learning_rate": 1.69245369487896e-06, "epoch": 4.160581886060723, "total_flos": 3970200995826339840, "step": 1034200 }, { "loss": 2.8125, "learning_rate": 1.6916425694830294e-06, "epoch": 4.160984185604918, "total_flos": 3970581663180165120, "step": 1034300 }, { "loss": 2.835, "learning_rate": 1.6908314440870988e-06, "epoch": 4.161386485149112, "total_flos": 3970969946855362560, "step": 1034400 }, { "loss": 2.815, "learning_rate": 1.6900203186911682e-06, "epoch": 4.161788784693307, "total_flos": 3971350948817448960, "step": 1034500 }, { "loss": 2.785, "learning_rate": 1.6892091932952376e-06, "epoch": 4.162191084237501, "total_flos": 3971739253737615360, "step": 1034600 }, { "loss": 2.77, "learning_rate": 1.688398067899307e-06, "epoch": 4.162593383781696, "total_flos": 3972115449025474560, "step": 1034700 }, { "loss": 2.8225, "learning_rate": 1.6875869425033764e-06, "epoch": 4.1629956833258905, "total_flos": 3972497661950791680, "step": 1034800 }, { "loss": 2.835, "learning_rate": 1.6867758171074459e-06, "epoch": 4.163397982870086, "total_flos": 3972872710010327040, "step": 1034900 }, { "loss": 2.7525, "learning_rate": 1.6859646917115153e-06, "epoch": 4.16380028241428, "total_flos": 3973247253501849600, "step": 1035000 }, { "loss": 2.85, "learning_rate": 1.6851535663155847e-06, "epoch": 4.164202581958475, "total_flos": 3973635468130897920, "step": 1035100 }, { "loss": 2.7975, "learning_rate": 1.684342440919654e-06, "epoch": 4.164604881502669, "total_flos": 3974024585671127040, "step": 1035200 }, { "loss": 2.76, "learning_rate": 1.6835313155237235e-06, "epoch": 4.165007181046864, "total_flos": 3974418531130552320, "step": 1035300 }, { "loss": 2.82, "learning_rate": 1.6827201901277929e-06, "epoch": 4.165409480591059, "total_flos": 3974812476589977600, "step": 1035400 }, { "loss": 2.8, "learning_rate": 1.6819090647318623e-06, "epoch": 4.165811780135253, "total_flos": 3975191975470510080, "step": 1035500 }, { "loss": 2.85, "learning_rate": 1.6810979393359317e-06, "epoch": 4.166214079679448, "total_flos": 3975580391926763520, "step": 1035600 }, { "loss": 2.77, "learning_rate": 1.6802868139400011e-06, "epoch": 4.166616379223642, "total_flos": 3975963104108851200, "step": 1035700 }, { "loss": 2.825, "learning_rate": 1.6794756885440705e-06, "epoch": 4.167018678767837, "total_flos": 3976359694566912000, "step": 1035800 }, { "loss": 2.855, "learning_rate": 1.67866456314814e-06, "epoch": 4.167420978312031, "total_flos": 3976728661254082560, "step": 1035900 }, { "loss": 2.7825, "learning_rate": 1.6778534377522093e-06, "epoch": 4.167823277856226, "total_flos": 3977118564858163200, "step": 1036000 }, { "loss": 2.7825, "learning_rate": 1.6770423123562787e-06, "epoch": 4.16822557740042, "total_flos": 3977509297016033280, "step": 1036100 }, { "loss": 2.805, "learning_rate": 1.6762311869603482e-06, "epoch": 4.1686278769446155, "total_flos": 3977893007711662080, "step": 1036200 }, { "loss": 2.8525, "learning_rate": 1.6754200615644176e-06, "epoch": 4.1690301764888105, "total_flos": 3978269176443310080, "step": 1036300 }, { "loss": 2.7825, "learning_rate": 1.6746089361684872e-06, "epoch": 4.169432476033005, "total_flos": 3978660216653230080, "step": 1036400 }, { "loss": 2.7925, "learning_rate": 1.6737978107725566e-06, "epoch": 4.1698347755772, "total_flos": 3979036565967114240, "step": 1036500 }, { "loss": 2.7975, "learning_rate": 1.672986685376626e-06, "epoch": 4.170237075121394, "total_flos": 3979423489964298240, "step": 1036600 }, { "loss": 2.7025, "learning_rate": 1.6721755599806954e-06, "epoch": 4.170639374665589, "total_flos": 3979800710321909760, "step": 1036700 }, { "loss": 2.8225, "learning_rate": 1.6713644345847648e-06, "epoch": 4.171041674209783, "total_flos": 3980193524486737920, "step": 1036800 }, { "loss": 2.8175, "learning_rate": 1.6705533091888342e-06, "epoch": 4.171443973753978, "total_flos": 3980586694504796160, "step": 1036900 }, { "loss": 2.8375, "learning_rate": 1.6697421837929036e-06, "epoch": 4.171846273298172, "total_flos": 3980980108839997440, "step": 1037000 }, { "loss": 2.8025, "learning_rate": 1.668931058396973e-06, "epoch": 4.172248572842367, "total_flos": 3981368928950661120, "step": 1037100 }, { "loss": 2.7725, "learning_rate": 1.6681199330010424e-06, "epoch": 4.172650872386561, "total_flos": 3981761589089464320, "step": 1037200 }, { "loss": 2.7975, "learning_rate": 1.6673088076051118e-06, "epoch": 4.173053171930756, "total_flos": 3982147774823976960, "step": 1037300 }, { "loss": 2.82, "learning_rate": 1.6664976822091813e-06, "epoch": 4.173455471474951, "total_flos": 3982510622960087040, "step": 1037400 }, { "loss": 2.77, "learning_rate": 1.6656865568132507e-06, "epoch": 4.173857771019145, "total_flos": 3982897355752550400, "step": 1037500 }, { "loss": 2.7975, "learning_rate": 1.66487543141732e-06, "epoch": 4.17426007056334, "total_flos": 3983287631143587840, "step": 1037600 }, { "loss": 2.7775, "learning_rate": 1.6640643060213895e-06, "epoch": 4.1746623701075345, "total_flos": 3983681077346242560, "step": 1037700 }, { "loss": 2.8725, "learning_rate": 1.6632531806254589e-06, "epoch": 4.1750646696517295, "total_flos": 3984061048927334400, "step": 1037800 }, { "loss": 2.825, "learning_rate": 1.6624420552295283e-06, "epoch": 4.175466969195924, "total_flos": 3984452121004707840, "step": 1037900 }, { "loss": 2.82, "learning_rate": 1.6616309298335977e-06, "epoch": 4.175869268740119, "total_flos": 3984846130199040000, "step": 1038000 }, { "loss": 2.8075, "learning_rate": 1.6608198044376673e-06, "epoch": 4.176271568284313, "total_flos": 3985238891251445760, "step": 1038100 }, { "loss": 2.72, "learning_rate": 1.6600086790417367e-06, "epoch": 4.176673867828508, "total_flos": 3985633362523852800, "step": 1038200 }, { "loss": 2.79, "learning_rate": 1.6591975536458061e-06, "epoch": 4.177076167372702, "total_flos": 3985993124828221440, "step": 1038300 }, { "loss": 2.805, "learning_rate": 1.6583864282498755e-06, "epoch": 4.177478466916897, "total_flos": 3986378561677578240, "step": 1038400 }, { "loss": 2.7775, "learning_rate": 1.657575302853945e-06, "epoch": 4.177880766461092, "total_flos": 3986760142565068800, "step": 1038500 }, { "loss": 2.7975, "learning_rate": 1.6567641774580144e-06, "epoch": 4.178283066005286, "total_flos": 3987141516314112000, "step": 1038600 }, { "loss": 2.7775, "learning_rate": 1.6559530520620838e-06, "epoch": 4.178685365549481, "total_flos": 3987524929580175360, "step": 1038700 }, { "loss": 2.825, "learning_rate": 1.6551419266661532e-06, "epoch": 4.179087665093675, "total_flos": 3987915672360529920, "step": 1038800 }, { "loss": 2.73, "learning_rate": 1.6543308012702226e-06, "epoch": 4.17948996463787, "total_flos": 3988295558961745920, "step": 1038900 }, { "loss": 2.795, "learning_rate": 1.653519675874292e-06, "epoch": 4.179892264182064, "total_flos": 3988675567721533440, "step": 1039000 }, { "loss": 2.7525, "learning_rate": 1.6527085504783614e-06, "epoch": 4.180294563726259, "total_flos": 3989055486190202880, "step": 1039100 }, { "loss": 2.805, "learning_rate": 1.6518974250824308e-06, "epoch": 4.1806968632704535, "total_flos": 3989441077065584640, "step": 1039200 }, { "loss": 2.8175, "learning_rate": 1.6510862996865002e-06, "epoch": 4.1810991628146486, "total_flos": 3989826567027363840, "step": 1039300 }, { "loss": 2.86, "learning_rate": 1.6502751742905696e-06, "epoch": 4.181501462358844, "total_flos": 3990212423464857600, "step": 1039400 }, { "loss": 2.7775, "learning_rate": 1.649464048894639e-06, "epoch": 4.181903761903038, "total_flos": 3990595778307256320, "step": 1039500 }, { "loss": 2.82, "learning_rate": 1.6486529234987084e-06, "epoch": 4.182306061447233, "total_flos": 3990981448851271680, "step": 1039600 }, { "loss": 2.8025, "learning_rate": 1.6478417981027778e-06, "epoch": 4.182708360991427, "total_flos": 3991372632464732160, "step": 1039700 }, { "loss": 2.8, "learning_rate": 1.6470306727068472e-06, "epoch": 4.183110660535622, "total_flos": 3991754054014955520, "step": 1039800 }, { "loss": 2.8175, "learning_rate": 1.6462195473109167e-06, "epoch": 4.183512960079816, "total_flos": 3992133430736916480, "step": 1039900 }, { "loss": 2.835, "learning_rate": 1.645408421914986e-06, "epoch": 4.183915259624011, "total_flos": 3992514289295462400, "step": 1040000 }, { "loss": 2.7925, "learning_rate": 1.6445972965190555e-06, "epoch": 4.184317559168205, "total_flos": 3992891005085061120, "step": 1040100 }, { "loss": 2.7575, "learning_rate": 1.6437861711231249e-06, "epoch": 4.1847198587124, "total_flos": 3993265532642856960, "step": 1040200 }, { "loss": 2.85, "learning_rate": 1.6429750457271943e-06, "epoch": 4.185122158256594, "total_flos": 3993647522496000000, "step": 1040300 }, { "loss": 2.7925, "learning_rate": 1.6421639203312637e-06, "epoch": 4.185524457800789, "total_flos": 3994048388504064000, "step": 1040400 }, { "loss": 2.8075, "learning_rate": 1.641352794935333e-06, "epoch": 4.185926757344984, "total_flos": 3994413504540610560, "step": 1040500 }, { "loss": 2.8375, "learning_rate": 1.6405416695394025e-06, "epoch": 4.1863290568891784, "total_flos": 3994789843232010240, "step": 1040600 }, { "loss": 2.79, "learning_rate": 1.639730544143472e-06, "epoch": 4.1867313564333735, "total_flos": 3995167525667696640, "step": 1040700 }, { "loss": 2.7375, "learning_rate": 1.6389194187475413e-06, "epoch": 4.187133655977568, "total_flos": 3995567308182343680, "step": 1040800 }, { "loss": 2.79, "learning_rate": 1.6381082933516107e-06, "epoch": 4.187535955521763, "total_flos": 3995950960454307840, "step": 1040900 }, { "loss": 2.805, "learning_rate": 1.6372971679556801e-06, "epoch": 4.187938255065957, "total_flos": 3996317191851724800, "step": 1041000 }, { "loss": 2.755, "learning_rate": 1.6364860425597495e-06, "epoch": 4.188340554610152, "total_flos": 3996707738116116480, "step": 1041100 }, { "loss": 2.8375, "learning_rate": 1.6356749171638192e-06, "epoch": 4.188742854154346, "total_flos": 3997094800205598720, "step": 1041200 }, { "loss": 2.8625, "learning_rate": 1.6348637917678886e-06, "epoch": 4.189145153698541, "total_flos": 3997475733121536000, "step": 1041300 }, { "loss": 2.8, "learning_rate": 1.634052666371958e-06, "epoch": 4.189547453242735, "total_flos": 3997852772896911360, "step": 1041400 }, { "loss": 2.7775, "learning_rate": 1.6332415409760274e-06, "epoch": 4.18994975278693, "total_flos": 3998232330201108480, "step": 1041500 }, { "loss": 2.7875, "learning_rate": 1.6324304155800968e-06, "epoch": 4.190352052331125, "total_flos": 3998615642553569280, "step": 1041600 }, { "loss": 2.8075, "learning_rate": 1.6316192901841662e-06, "epoch": 4.190754351875319, "total_flos": 3999007883104235520, "step": 1041700 }, { "loss": 2.7675, "learning_rate": 1.6308081647882356e-06, "epoch": 4.191156651419514, "total_flos": 3999391939030609920, "step": 1041800 }, { "loss": 2.8175, "learning_rate": 1.629997039392305e-06, "epoch": 4.191558950963708, "total_flos": 3999775182336921600, "step": 1041900 }, { "loss": 2.7825, "learning_rate": 1.6291859139963744e-06, "epoch": 4.191961250507903, "total_flos": 4000153029421117440, "step": 1042000 }, { "loss": 2.745, "learning_rate": 1.6283747886004438e-06, "epoch": 4.1923635500520975, "total_flos": 4000531933442519040, "step": 1042100 }, { "loss": 2.79, "learning_rate": 1.6275636632045132e-06, "epoch": 4.1927658495962925, "total_flos": 4000909674301870080, "step": 1042200 }, { "loss": 2.8125, "learning_rate": 1.6267525378085827e-06, "epoch": 4.193168149140487, "total_flos": 4001290984316006400, "step": 1042300 }, { "loss": 2.825, "learning_rate": 1.625941412412652e-06, "epoch": 4.193570448684682, "total_flos": 4001670084853370880, "step": 1042400 }, { "loss": 2.745, "learning_rate": 1.6251302870167215e-06, "epoch": 4.193972748228877, "total_flos": 4002044060041973760, "step": 1042500 }, { "loss": 2.7675, "learning_rate": 1.6243191616207909e-06, "epoch": 4.194375047773071, "total_flos": 4002408501550755840, "step": 1042600 }, { "loss": 2.74, "learning_rate": 1.6235080362248603e-06, "epoch": 4.194777347317266, "total_flos": 4002781249842401280, "step": 1042700 }, { "loss": 2.8175, "learning_rate": 1.6226969108289297e-06, "epoch": 4.19517964686146, "total_flos": 4003159867056721920, "step": 1042800 }, { "loss": 2.855, "learning_rate": 1.621885785432999e-06, "epoch": 4.195581946405655, "total_flos": 4003548543763845120, "step": 1042900 }, { "loss": 2.7375, "learning_rate": 1.6210746600370685e-06, "epoch": 4.195984245949849, "total_flos": 4003951459911413760, "step": 1043000 }, { "loss": 2.8075, "learning_rate": 1.620263534641138e-06, "epoch": 4.196386545494044, "total_flos": 4004342813484625920, "step": 1043100 }, { "loss": 2.885, "learning_rate": 1.6194524092452073e-06, "epoch": 4.196788845038238, "total_flos": 4004717595982049280, "step": 1043200 }, { "loss": 2.79, "learning_rate": 1.6186412838492767e-06, "epoch": 4.197191144582433, "total_flos": 4005110256120852480, "step": 1043300 }, { "loss": 2.7825, "learning_rate": 1.6178301584533461e-06, "epoch": 4.197593444126627, "total_flos": 4005485803437158400, "step": 1043400 }, { "loss": 2.7925, "learning_rate": 1.6170190330574155e-06, "epoch": 4.197995743670822, "total_flos": 4005880896124907520, "step": 1043500 }, { "loss": 2.8125, "learning_rate": 1.616207907661485e-06, "epoch": 4.198398043215017, "total_flos": 4006258796321525760, "step": 1043600 }, { "loss": 2.8275, "learning_rate": 1.6153967822655546e-06, "epoch": 4.1988003427592115, "total_flos": 4006647531452313600, "step": 1043700 }, { "loss": 2.78, "learning_rate": 1.614585656869624e-06, "epoch": 4.199202642303407, "total_flos": 4007037594393661440, "step": 1043800 }, { "loss": 2.8225, "learning_rate": 1.6137745314736934e-06, "epoch": 4.199604941847601, "total_flos": 4007441673703280640, "step": 1043900 }, { "loss": 2.7775, "learning_rate": 1.6129634060777628e-06, "epoch": 4.200007241391796, "total_flos": 4007827737279221760, "step": 1044000 }, { "loss": 2.7875, "learning_rate": 1.6121522806818322e-06, "epoch": 4.20040954093599, "total_flos": 4008229718648156160, "step": 1044100 }, { "loss": 2.7825, "learning_rate": 1.6113411552859016e-06, "epoch": 4.200811840480185, "total_flos": 4008605903313530880, "step": 1044200 }, { "loss": 2.7925, "learning_rate": 1.610530029889971e-06, "epoch": 4.201214140024379, "total_flos": 4009005420266065920, "step": 1044300 }, { "loss": 2.815, "learning_rate": 1.6097189044940404e-06, "epoch": 4.201616439568574, "total_flos": 4009410290950778880, "step": 1044400 }, { "loss": 2.7725, "learning_rate": 1.6089077790981098e-06, "epoch": 4.202018739112768, "total_flos": 4009791770924666880, "step": 1044500 }, { "loss": 2.825, "learning_rate": 1.6080966537021792e-06, "epoch": 4.202421038656963, "total_flos": 4010169134685818880, "step": 1044600 }, { "loss": 2.8275, "learning_rate": 1.6072855283062486e-06, "epoch": 4.202823338201158, "total_flos": 4010558756794060800, "step": 1044700 }, { "loss": 2.8125, "learning_rate": 1.606474402910318e-06, "epoch": 4.203225637745352, "total_flos": 4010962432449269760, "step": 1044800 }, { "loss": 2.825, "learning_rate": 1.6056632775143875e-06, "epoch": 4.203627937289547, "total_flos": 4011358412114472960, "step": 1044900 }, { "loss": 2.79, "learning_rate": 1.6048521521184569e-06, "epoch": 4.204030236833741, "total_flos": 4011730294673633280, "step": 1045000 }, { "loss": 2.8575, "learning_rate": 1.6040410267225263e-06, "epoch": 4.2044325363779365, "total_flos": 4012112454486528000, "step": 1045100 }, { "loss": 2.81, "learning_rate": 1.6032299013265957e-06, "epoch": 4.204834835922131, "total_flos": 4012496749418803200, "step": 1045200 }, { "loss": 2.795, "learning_rate": 1.602418775930665e-06, "epoch": 4.205237135466326, "total_flos": 4012882143778222080, "step": 1045300 }, { "loss": 2.84, "learning_rate": 1.6016076505347345e-06, "epoch": 4.20563943501052, "total_flos": 4013271877422551040, "step": 1045400 }, { "loss": 2.77, "learning_rate": 1.600796525138804e-06, "epoch": 4.206041734554715, "total_flos": 4013647647811031040, "step": 1045500 }, { "loss": 2.78, "learning_rate": 1.5999853997428733e-06, "epoch": 4.20644403409891, "total_flos": 4014034635543121920, "step": 1045600 }, { "loss": 2.8, "learning_rate": 1.5991742743469427e-06, "epoch": 4.206846333643104, "total_flos": 4014420507914342400, "step": 1045700 }, { "loss": 2.8125, "learning_rate": 1.5983631489510121e-06, "epoch": 4.207248633187299, "total_flos": 4014801584233820160, "step": 1045800 }, { "loss": 2.84, "learning_rate": 1.5975520235550817e-06, "epoch": 4.207650932731493, "total_flos": 4015179797793730560, "step": 1045900 }, { "loss": 2.7425, "learning_rate": 1.5967408981591512e-06, "epoch": 4.208053232275688, "total_flos": 4015564650406440960, "step": 1046000 }, { "loss": 2.79, "learning_rate": 1.5959297727632206e-06, "epoch": 4.208455531819882, "total_flos": 4015951903700643840, "step": 1046100 }, { "loss": 2.79, "learning_rate": 1.59511864736729e-06, "epoch": 4.208857831364077, "total_flos": 4016358665187594240, "step": 1046200 }, { "loss": 2.8025, "learning_rate": 1.5943075219713594e-06, "epoch": 4.209260130908271, "total_flos": 4016747612768071680, "step": 1046300 }, { "loss": 2.79, "learning_rate": 1.5934963965754288e-06, "epoch": 4.209662430452466, "total_flos": 4017130548022333440, "step": 1046400 }, { "loss": 2.7925, "learning_rate": 1.5926852711794982e-06, "epoch": 4.210064729996661, "total_flos": 4017510296531251200, "step": 1046500 }, { "loss": 2.8175, "learning_rate": 1.5918741457835676e-06, "epoch": 4.2104670295408555, "total_flos": 4017894968561725440, "step": 1046600 }, { "loss": 2.8325, "learning_rate": 1.591063020387637e-06, "epoch": 4.2108693290850505, "total_flos": 4018273102453002240, "step": 1046700 }, { "loss": 2.7825, "learning_rate": 1.5902518949917064e-06, "epoch": 4.211271628629245, "total_flos": 4018644379530547200, "step": 1046800 }, { "loss": 2.81, "learning_rate": 1.5894407695957758e-06, "epoch": 4.21167392817344, "total_flos": 4019029747333754880, "step": 1046900 }, { "loss": 2.7925, "learning_rate": 1.5886296441998452e-06, "epoch": 4.212076227717634, "total_flos": 4019426603353927680, "step": 1047000 }, { "loss": 2.79, "learning_rate": 1.5878185188039146e-06, "epoch": 4.212478527261829, "total_flos": 4019815577490616320, "step": 1047100 }, { "loss": 2.795, "learning_rate": 1.587007393407984e-06, "epoch": 4.212880826806023, "total_flos": 4020211265037496320, "step": 1047200 }, { "loss": 2.7675, "learning_rate": 1.5861962680120535e-06, "epoch": 4.213283126350218, "total_flos": 4020593775392378880, "step": 1047300 }, { "loss": 2.795, "learning_rate": 1.5853851426161229e-06, "epoch": 4.213685425894412, "total_flos": 4020985367971491840, "step": 1047400 }, { "loss": 2.81, "learning_rate": 1.5845740172201923e-06, "epoch": 4.214087725438607, "total_flos": 4021365647604633600, "step": 1047500 }, { "loss": 2.795, "learning_rate": 1.5837628918242617e-06, "epoch": 4.214490024982802, "total_flos": 4021750994162872320, "step": 1047600 }, { "loss": 2.77, "learning_rate": 1.582951766428331e-06, "epoch": 4.214892324526996, "total_flos": 4022125367694643200, "step": 1047700 }, { "loss": 2.8075, "learning_rate": 1.5821406410324005e-06, "epoch": 4.215294624071191, "total_flos": 4022521910351523840, "step": 1047800 }, { "loss": 2.8075, "learning_rate": 1.58132951563647e-06, "epoch": 4.215696923615385, "total_flos": 4022914958211010560, "step": 1047900 }, { "loss": 2.835, "learning_rate": 1.5805183902405393e-06, "epoch": 4.21609922315958, "total_flos": 4023295901749432320, "step": 1048000 }, { "loss": 2.8375, "learning_rate": 1.5797072648446087e-06, "epoch": 4.2165015227037745, "total_flos": 4023659450969518080, "step": 1048100 }, { "loss": 2.775, "learning_rate": 1.5788961394486781e-06, "epoch": 4.21690382224797, "total_flos": 4024039427861852160, "step": 1048200 }, { "loss": 2.76, "learning_rate": 1.5780850140527475e-06, "epoch": 4.217306121792164, "total_flos": 4024422389672325120, "step": 1048300 }, { "loss": 2.75, "learning_rate": 1.577273888656817e-06, "epoch": 4.217708421336359, "total_flos": 4024814354038394880, "step": 1048400 }, { "loss": 2.8225, "learning_rate": 1.5764627632608863e-06, "epoch": 4.218110720880553, "total_flos": 4025196455427624960, "step": 1048500 }, { "loss": 2.7925, "learning_rate": 1.5756516378649558e-06, "epoch": 4.218513020424748, "total_flos": 4025589301459906560, "step": 1048600 }, { "loss": 2.815, "learning_rate": 1.5748405124690252e-06, "epoch": 4.218915319968943, "total_flos": 4025964848776212480, "step": 1048700 }, { "loss": 2.815, "learning_rate": 1.5740293870730946e-06, "epoch": 4.219317619513137, "total_flos": 4026358698633277440, "step": 1048800 }, { "loss": 2.8275, "learning_rate": 1.573218261677164e-06, "epoch": 4.219719919057332, "total_flos": 4026759352191651840, "step": 1048900 }, { "loss": 2.795, "learning_rate": 1.5724071362812334e-06, "epoch": 4.220122218601526, "total_flos": 4027165635666800640, "step": 1049000 }, { "loss": 2.75, "learning_rate": 1.5715960108853028e-06, "epoch": 4.220524518145721, "total_flos": 4027546754476216320, "step": 1049100 }, { "loss": 2.8025, "learning_rate": 1.5707848854893722e-06, "epoch": 4.220926817689915, "total_flos": 4027929865001472000, "step": 1049200 }, { "loss": 2.7525, "learning_rate": 1.5699737600934416e-06, "epoch": 4.22132911723411, "total_flos": 4028318833826918400, "step": 1049300 }, { "loss": 2.7925, "learning_rate": 1.5691626346975112e-06, "epoch": 4.221731416778304, "total_flos": 4028703553658572800, "step": 1049400 }, { "loss": 2.785, "learning_rate": 1.5683515093015806e-06, "epoch": 4.2221337163224995, "total_flos": 4029096787411537920, "step": 1049500 }, { "loss": 2.755, "learning_rate": 1.56754038390565e-06, "epoch": 4.2225360158666945, "total_flos": 4029494052397363200, "step": 1049600 }, { "loss": 2.85, "learning_rate": 1.5667292585097194e-06, "epoch": 4.222938315410889, "total_flos": 4029868250658140160, "step": 1049700 }, { "loss": 2.815, "learning_rate": 1.5659181331137889e-06, "epoch": 4.223340614955084, "total_flos": 4030247229036933120, "step": 1049800 }, { "loss": 2.825, "learning_rate": 1.5651070077178583e-06, "epoch": 4.223742914499278, "total_flos": 4030637541606666240, "step": 1049900 }, { "loss": 2.7975, "learning_rate": 1.5642958823219277e-06, "epoch": 4.224145214043473, "total_flos": 4031022118034780160, "step": 1050000 }, { "loss": 2.83, "learning_rate": 1.563484756925997e-06, "epoch": 4.224547513587667, "total_flos": 4031401207949660160, "step": 1050100 }, { "loss": 2.795, "learning_rate": 1.5626736315300665e-06, "epoch": 4.224949813131862, "total_flos": 4031793459122810880, "step": 1050200 }, { "loss": 2.8225, "learning_rate": 1.561862506134136e-06, "epoch": 4.225352112676056, "total_flos": 4032184494021488640, "step": 1050300 }, { "loss": 2.8225, "learning_rate": 1.5610513807382053e-06, "epoch": 4.225754412220251, "total_flos": 4032584754547937280, "step": 1050400 }, { "loss": 2.78, "learning_rate": 1.5602402553422747e-06, "epoch": 4.226156711764445, "total_flos": 4032960089414553600, "step": 1050500 }, { "loss": 2.7775, "learning_rate": 1.5594291299463441e-06, "epoch": 4.22655901130864, "total_flos": 4033350247958261760, "step": 1050600 }, { "loss": 2.7525, "learning_rate": 1.5586180045504137e-06, "epoch": 4.226961310852835, "total_flos": 4033729752150036480, "step": 1050700 }, { "loss": 2.845, "learning_rate": 1.5578068791544831e-06, "epoch": 4.227363610397029, "total_flos": 4034111327726284800, "step": 1050800 }, { "loss": 2.83, "learning_rate": 1.5569957537585526e-06, "epoch": 4.227765909941224, "total_flos": 4034494942819553280, "step": 1050900 }, { "loss": 2.8225, "learning_rate": 1.556184628362622e-06, "epoch": 4.2281682094854185, "total_flos": 4034882212047482880, "step": 1051000 }, { "loss": 2.7725, "learning_rate": 1.5553735029666914e-06, "epoch": 4.2285705090296135, "total_flos": 4035266119259074560, "step": 1051100 }, { "loss": 2.815, "learning_rate": 1.5545623775707608e-06, "epoch": 4.228972808573808, "total_flos": 4035655390825328640, "step": 1051200 }, { "loss": 2.81, "learning_rate": 1.5537512521748302e-06, "epoch": 4.229375108118003, "total_flos": 4036048247480094720, "step": 1051300 }, { "loss": 2.7975, "learning_rate": 1.5529401267788996e-06, "epoch": 4.229777407662197, "total_flos": 4036412269400739840, "step": 1051400 }, { "loss": 2.7825, "learning_rate": 1.552129001382969e-06, "epoch": 4.230179707206392, "total_flos": 4036797270728232960, "step": 1051500 }, { "loss": 2.815, "learning_rate": 1.5513178759870384e-06, "epoch": 4.230582006750586, "total_flos": 4037173928094167040, "step": 1051600 }, { "loss": 2.8075, "learning_rate": 1.5505067505911078e-06, "epoch": 4.230984306294781, "total_flos": 4037546469247365120, "step": 1051700 }, { "loss": 2.7775, "learning_rate": 1.5496956251951772e-06, "epoch": 4.231386605838976, "total_flos": 4037933371999580160, "step": 1051800 }, { "loss": 2.8275, "learning_rate": 1.5488844997992466e-06, "epoch": 4.23178890538317, "total_flos": 4038322701989498880, "step": 1051900 }, { "loss": 2.8125, "learning_rate": 1.548073374403316e-06, "epoch": 4.232191204927365, "total_flos": 4038699072548352000, "step": 1052000 }, { "loss": 2.8, "learning_rate": 1.5472622490073854e-06, "epoch": 4.232593504471559, "total_flos": 4039072500679004160, "step": 1052100 }, { "loss": 2.765, "learning_rate": 1.5464511236114549e-06, "epoch": 4.232995804015754, "total_flos": 4039446236861706240, "step": 1052200 }, { "loss": 2.82, "learning_rate": 1.5456399982155243e-06, "epoch": 4.233398103559948, "total_flos": 4039829926312366080, "step": 1052300 }, { "loss": 2.8025, "learning_rate": 1.5448288728195937e-06, "epoch": 4.233800403104143, "total_flos": 4040234786374594560, "step": 1052400 }, { "loss": 2.7775, "learning_rate": 1.544017747423663e-06, "epoch": 4.2342027026483375, "total_flos": 4040609977837670400, "step": 1052500 }, { "loss": 2.835, "learning_rate": 1.5432066220277325e-06, "epoch": 4.2346050021925326, "total_flos": 4040991218805657600, "step": 1052600 }, { "loss": 2.765, "learning_rate": 1.5423954966318019e-06, "epoch": 4.235007301736728, "total_flos": 4041387373741854720, "step": 1052700 }, { "loss": 2.76, "learning_rate": 1.5415843712358713e-06, "epoch": 4.235409601280922, "total_flos": 4041769969076613120, "step": 1052800 }, { "loss": 2.76, "learning_rate": 1.5407732458399407e-06, "epoch": 4.235811900825117, "total_flos": 4042143360028569600, "step": 1052900 }, { "loss": 2.805, "learning_rate": 1.5399621204440101e-06, "epoch": 4.236214200369311, "total_flos": 4042531728683642880, "step": 1053000 }, { "loss": 2.7775, "learning_rate": 1.5391509950480795e-06, "epoch": 4.236616499913506, "total_flos": 4042924516292259840, "step": 1053100 }, { "loss": 2.8375, "learning_rate": 1.538339869652149e-06, "epoch": 4.2370187994577, "total_flos": 4043311732407767040, "step": 1053200 }, { "loss": 2.7875, "learning_rate": 1.5375287442562183e-06, "epoch": 4.237421099001895, "total_flos": 4043707600536883200, "step": 1053300 }, { "loss": 2.8125, "learning_rate": 1.5367176188602877e-06, "epoch": 4.237823398546089, "total_flos": 4044093318882078720, "step": 1053400 }, { "loss": 2.74, "learning_rate": 1.5359064934643572e-06, "epoch": 4.238225698090284, "total_flos": 4044481931854295040, "step": 1053500 }, { "loss": 2.815, "learning_rate": 1.5350953680684266e-06, "epoch": 4.238627997634478, "total_flos": 4044865063624519680, "step": 1053600 }, { "loss": 2.8175, "learning_rate": 1.534284242672496e-06, "epoch": 4.239030297178673, "total_flos": 4045247117212569600, "step": 1053700 }, { "loss": 2.7725, "learning_rate": 1.5334731172765654e-06, "epoch": 4.239432596722868, "total_flos": 4045623540883845120, "step": 1053800 }, { "loss": 2.84, "learning_rate": 1.5326619918806348e-06, "epoch": 4.2398348962670624, "total_flos": 4046019488681594880, "step": 1053900 }, { "loss": 2.8425, "learning_rate": 1.5318508664847042e-06, "epoch": 4.2402371958112575, "total_flos": 4046403109086105600, "step": 1054000 }, { "loss": 2.77, "learning_rate": 1.5310397410887736e-06, "epoch": 4.240639495355452, "total_flos": 4046796326905344000, "step": 1054100 }, { "loss": 2.7925, "learning_rate": 1.530228615692843e-06, "epoch": 4.241041794899647, "total_flos": 4047174285525626880, "step": 1054200 }, { "loss": 2.7625, "learning_rate": 1.5294174902969124e-06, "epoch": 4.241444094443841, "total_flos": 4047550390522368000, "step": 1054300 }, { "loss": 2.805, "learning_rate": 1.5286063649009818e-06, "epoch": 4.241846393988036, "total_flos": 4047909154313195520, "step": 1054400 }, { "loss": 2.79, "learning_rate": 1.5277952395050512e-06, "epoch": 4.24224869353223, "total_flos": 4048313005239398400, "step": 1054500 }, { "loss": 2.8, "learning_rate": 1.5269841141091206e-06, "epoch": 4.242650993076425, "total_flos": 4048715443375165440, "step": 1054600 }, { "loss": 2.8, "learning_rate": 1.52617298871319e-06, "epoch": 4.243053292620619, "total_flos": 4049088005773332480, "step": 1054700 }, { "loss": 2.8025, "learning_rate": 1.5253618633172594e-06, "epoch": 4.243455592164814, "total_flos": 4049456669719695360, "step": 1054800 }, { "loss": 2.81, "learning_rate": 1.5245507379213289e-06, "epoch": 4.243857891709009, "total_flos": 4049836492586004480, "step": 1054900 }, { "loss": 2.8325, "learning_rate": 1.5237396125253983e-06, "epoch": 4.244260191253203, "total_flos": 4050215842751754240, "step": 1055000 }, { "loss": 2.775, "learning_rate": 1.5229284871294679e-06, "epoch": 4.244662490797398, "total_flos": 4050606930762854400, "step": 1055100 }, { "loss": 2.8375, "learning_rate": 1.5221173617335373e-06, "epoch": 4.245064790341592, "total_flos": 4050994614267678720, "step": 1055200 }, { "loss": 2.7475, "learning_rate": 1.5213062363376067e-06, "epoch": 4.245467089885787, "total_flos": 4051368021153361920, "step": 1055300 }, { "loss": 2.82, "learning_rate": 1.5204951109416763e-06, "epoch": 4.2458693894299815, "total_flos": 4051755019507937280, "step": 1055400 }, { "loss": 2.805, "learning_rate": 1.5196839855457457e-06, "epoch": 4.2462716889741765, "total_flos": 4052135299141079040, "step": 1055500 }, { "loss": 2.8175, "learning_rate": 1.5188728601498151e-06, "epoch": 4.246673988518371, "total_flos": 4052530407762554880, "step": 1055600 }, { "loss": 2.805, "learning_rate": 1.5180617347538845e-06, "epoch": 4.247076288062566, "total_flos": 4052910427144826880, "step": 1055700 }, { "loss": 2.8075, "learning_rate": 1.517250609357954e-06, "epoch": 4.247478587606761, "total_flos": 4053292353263063040, "step": 1055800 }, { "loss": 2.78, "learning_rate": 1.5164394839620234e-06, "epoch": 4.247880887150955, "total_flos": 4053681401757143040, "step": 1055900 }, { "loss": 2.7625, "learning_rate": 1.5156283585660928e-06, "epoch": 4.24828318669515, "total_flos": 4054067481266810880, "step": 1056000 }, { "loss": 2.8225, "learning_rate": 1.5148172331701622e-06, "epoch": 4.248685486239344, "total_flos": 4054449487053680640, "step": 1056100 }, { "loss": 2.8125, "learning_rate": 1.5140061077742316e-06, "epoch": 4.249087785783539, "total_flos": 4054832762227445760, "step": 1056200 }, { "loss": 2.8175, "learning_rate": 1.513194982378301e-06, "epoch": 4.249490085327733, "total_flos": 4055218400904007680, "step": 1056300 }, { "loss": 2.82, "learning_rate": 1.5123838569823704e-06, "epoch": 4.249892384871928, "total_flos": 4055592057418076160, "step": 1056400 }, { "loss": 2.825, "learning_rate": 1.5115727315864398e-06, "epoch": 4.250294684416122, "total_flos": 4055982497457623040, "step": 1056500 }, { "loss": 2.7975, "learning_rate": 1.5107616061905092e-06, "epoch": 4.250696983960317, "total_flos": 4056359043287470080, "step": 1056600 }, { "loss": 2.8025, "learning_rate": 1.5099504807945786e-06, "epoch": 4.251099283504511, "total_flos": 4056753206507827200, "step": 1056700 }, { "loss": 2.79, "learning_rate": 1.509139355398648e-06, "epoch": 4.251501583048706, "total_flos": 4057138590244761600, "step": 1056800 }, { "loss": 2.785, "learning_rate": 1.5083282300027174e-06, "epoch": 4.251903882592901, "total_flos": 4057513590503116800, "step": 1056900 }, { "loss": 2.755, "learning_rate": 1.5075171046067868e-06, "epoch": 4.2523061821370955, "total_flos": 4057888505781596160, "step": 1057000 }, { "loss": 2.7775, "learning_rate": 1.5067059792108562e-06, "epoch": 4.252708481681291, "total_flos": 4058270086669086720, "step": 1057100 }, { "loss": 2.7925, "learning_rate": 1.5058948538149257e-06, "epoch": 4.253110781225485, "total_flos": 4058665492720128000, "step": 1057200 }, { "loss": 2.765, "learning_rate": 1.505083728418995e-06, "epoch": 4.25351308076968, "total_flos": 4059063910245519360, "step": 1057300 }, { "loss": 2.815, "learning_rate": 1.5042726030230645e-06, "epoch": 4.253915380313874, "total_flos": 4059446755208663040, "step": 1057400 }, { "loss": 2.7575, "learning_rate": 1.5034614776271339e-06, "epoch": 4.254317679858069, "total_flos": 4059836526031687680, "step": 1057500 }, { "loss": 2.795, "learning_rate": 1.5026503522312033e-06, "epoch": 4.254719979402263, "total_flos": 4060206560278548480, "step": 1057600 }, { "loss": 2.835, "learning_rate": 1.5018392268352727e-06, "epoch": 4.255122278946458, "total_flos": 4060589299016847360, "step": 1057700 }, { "loss": 2.7175, "learning_rate": 1.501028101439342e-06, "epoch": 4.255524578490652, "total_flos": 4060978814900244480, "step": 1057800 }, { "loss": 2.765, "learning_rate": 1.5002169760434115e-06, "epoch": 4.255926878034847, "total_flos": 4061360735707238400, "step": 1057900 }, { "loss": 2.835, "learning_rate": 1.499405850647481e-06, "epoch": 4.256329177579042, "total_flos": 4061751382885232640, "step": 1058000 }, { "loss": 2.7825, "learning_rate": 1.4985947252515503e-06, "epoch": 4.256731477123236, "total_flos": 4062138312193658880, "step": 1058100 }, { "loss": 2.8325, "learning_rate": 1.4977835998556197e-06, "epoch": 4.257133776667431, "total_flos": 4062512276759777280, "step": 1058200 }, { "loss": 2.7525, "learning_rate": 1.4969724744596891e-06, "epoch": 4.257536076211625, "total_flos": 4062896279573729280, "step": 1058300 }, { "loss": 2.7625, "learning_rate": 1.4961613490637585e-06, "epoch": 4.2579383757558205, "total_flos": 4063282778671534080, "step": 1058400 }, { "loss": 2.825, "learning_rate": 1.495350223667828e-06, "epoch": 4.258340675300015, "total_flos": 4063658166650572800, "step": 1058500 }, { "loss": 2.835, "learning_rate": 1.4945390982718974e-06, "epoch": 4.25874297484421, "total_flos": 4064042174775767040, "step": 1058600 }, { "loss": 2.8125, "learning_rate": 1.4937279728759668e-06, "epoch": 4.259145274388404, "total_flos": 4064417344993873920, "step": 1058700 }, { "loss": 2.8275, "learning_rate": 1.4929168474800362e-06, "epoch": 4.259547573932599, "total_flos": 4064826023839272960, "step": 1058800 }, { "loss": 2.8525, "learning_rate": 1.4921057220841056e-06, "epoch": 4.259949873476794, "total_flos": 4065211232305213440, "step": 1058900 }, { "loss": 2.8, "learning_rate": 1.491294596688175e-06, "epoch": 4.260352173020988, "total_flos": 4065600530427678720, "step": 1059000 }, { "loss": 2.7775, "learning_rate": 1.4904834712922444e-06, "epoch": 4.260754472565183, "total_flos": 4065997205865615360, "step": 1059100 }, { "loss": 2.8175, "learning_rate": 1.4896723458963138e-06, "epoch": 4.261156772109377, "total_flos": 4066377347406458880, "step": 1059200 }, { "loss": 2.83, "learning_rate": 1.4888612205003832e-06, "epoch": 4.261559071653572, "total_flos": 4066779238484275200, "step": 1059300 }, { "loss": 2.7725, "learning_rate": 1.4880500951044526e-06, "epoch": 4.261961371197766, "total_flos": 4067177937505505280, "step": 1059400 }, { "loss": 2.8275, "learning_rate": 1.487238969708522e-06, "epoch": 4.262363670741961, "total_flos": 4067542421504225280, "step": 1059500 }, { "loss": 2.785, "learning_rate": 1.4864278443125914e-06, "epoch": 4.262765970286155, "total_flos": 4067936467877253120, "step": 1059600 }, { "loss": 2.7925, "learning_rate": 1.4856167189166608e-06, "epoch": 4.26316826983035, "total_flos": 4068315355964928000, "step": 1059700 }, { "loss": 2.785, "learning_rate": 1.4848055935207303e-06, "epoch": 4.2635705693745445, "total_flos": 4068686638353715200, "step": 1059800 }, { "loss": 2.8325, "learning_rate": 1.4839944681247997e-06, "epoch": 4.2639728689187395, "total_flos": 4069064326100643840, "step": 1059900 }, { "loss": 2.78, "learning_rate": 1.483183342728869e-06, "epoch": 4.2643751684629345, "total_flos": 4069452099896586240, "step": 1060000 }, { "loss": 2.8275, "learning_rate": 1.482372217332939e-06, "epoch": 4.264777468007129, "total_flos": 4069822096964751360, "step": 1060100 }, { "loss": 2.79, "learning_rate": 1.4815610919370083e-06, "epoch": 4.265179767551324, "total_flos": 4070201255925780480, "step": 1060200 }, { "loss": 2.82, "learning_rate": 1.4807499665410777e-06, "epoch": 4.265582067095518, "total_flos": 4070566637524439040, "step": 1060300 }, { "loss": 2.8325, "learning_rate": 1.4799388411451471e-06, "epoch": 4.265984366639713, "total_flos": 4070963652881879040, "step": 1060400 }, { "loss": 2.77, "learning_rate": 1.4791277157492165e-06, "epoch": 4.266386666183907, "total_flos": 4071349530564341760, "step": 1060500 }, { "loss": 2.795, "learning_rate": 1.478316590353286e-06, "epoch": 4.266788965728102, "total_flos": 4071719373606481920, "step": 1060600 }, { "loss": 2.825, "learning_rate": 1.4775054649573553e-06, "epoch": 4.267191265272296, "total_flos": 4072118545328271360, "step": 1060700 }, { "loss": 2.7475, "learning_rate": 1.4766943395614248e-06, "epoch": 4.267593564816491, "total_flos": 4072505294054461440, "step": 1060800 }, { "loss": 2.76, "learning_rate": 1.4758832141654942e-06, "epoch": 4.267995864360685, "total_flos": 4072889227822264320, "step": 1060900 }, { "loss": 2.77, "learning_rate": 1.4750720887695636e-06, "epoch": 4.26839816390488, "total_flos": 4073270065135841280, "step": 1061000 }, { "loss": 2.73, "learning_rate": 1.474260963373633e-06, "epoch": 4.268800463449075, "total_flos": 4073655353270415360, "step": 1061100 }, { "loss": 2.7975, "learning_rate": 1.4734498379777024e-06, "epoch": 4.269202762993269, "total_flos": 4074042112619089920, "step": 1061200 }, { "loss": 2.775, "learning_rate": 1.4726387125817718e-06, "epoch": 4.269605062537464, "total_flos": 4074422572834467840, "step": 1061300 }, { "loss": 2.7675, "learning_rate": 1.4718275871858412e-06, "epoch": 4.2700073620816585, "total_flos": 4074803840358666240, "step": 1061400 }, { "loss": 2.785, "learning_rate": 1.4710164617899106e-06, "epoch": 4.270409661625854, "total_flos": 4075177767746088960, "step": 1061500 }, { "loss": 2.76, "learning_rate": 1.47020533639398e-06, "epoch": 4.270811961170048, "total_flos": 4075563980036812800, "step": 1061600 }, { "loss": 2.795, "learning_rate": 1.4693942109980494e-06, "epoch": 4.271214260714243, "total_flos": 4075961457472327680, "step": 1061700 }, { "loss": 2.8025, "learning_rate": 1.4685830856021188e-06, "epoch": 4.271616560258437, "total_flos": 4076350575012556800, "step": 1061800 }, { "loss": 2.8075, "learning_rate": 1.4677719602061882e-06, "epoch": 4.272018859802632, "total_flos": 4076740080273469440, "step": 1061900 }, { "loss": 2.805, "learning_rate": 1.4669608348102576e-06, "epoch": 4.272421159346827, "total_flos": 4077120009364623360, "step": 1062000 }, { "loss": 2.825, "learning_rate": 1.466149709414327e-06, "epoch": 4.272823458891021, "total_flos": 4077498977120931840, "step": 1062100 }, { "loss": 2.79, "learning_rate": 1.4653385840183965e-06, "epoch": 4.273225758435216, "total_flos": 4077879336422707200, "step": 1062200 }, { "loss": 2.7675, "learning_rate": 1.4645274586224659e-06, "epoch": 4.27362805797941, "total_flos": 4078264518332436480, "step": 1062300 }, { "loss": 2.825, "learning_rate": 1.4637163332265353e-06, "epoch": 4.274030357523605, "total_flos": 4078645153818808320, "step": 1062400 }, { "loss": 2.7775, "learning_rate": 1.4629052078306047e-06, "epoch": 4.274432657067799, "total_flos": 4079024689878036480, "step": 1062500 }, { "loss": 2.8525, "learning_rate": 1.462094082434674e-06, "epoch": 4.274834956611994, "total_flos": 4079409457510871040, "step": 1062600 }, { "loss": 2.7175, "learning_rate": 1.4612829570387435e-06, "epoch": 4.275237256156188, "total_flos": 4079798686587187200, "step": 1062700 }, { "loss": 2.7925, "learning_rate": 1.460471831642813e-06, "epoch": 4.2756395557003835, "total_flos": 4080185860212756480, "step": 1062800 }, { "loss": 2.8125, "learning_rate": 1.4596607062468823e-06, "epoch": 4.2760418552445785, "total_flos": 4080563962236579840, "step": 1062900 }, { "loss": 2.7375, "learning_rate": 1.4588495808509517e-06, "epoch": 4.276444154788773, "total_flos": 4080938027716300800, "step": 1063000 }, { "loss": 2.78, "learning_rate": 1.4580384554550211e-06, "epoch": 4.276846454332968, "total_flos": 4081325695287398400, "step": 1063100 }, { "loss": 2.8075, "learning_rate": 1.4572273300590905e-06, "epoch": 4.277248753877162, "total_flos": 4081715598891479040, "step": 1063200 }, { "loss": 2.765, "learning_rate": 1.45641620466316e-06, "epoch": 4.277651053421357, "total_flos": 4082106150467112960, "step": 1063300 }, { "loss": 2.795, "learning_rate": 1.4556050792672294e-06, "epoch": 4.278053352965551, "total_flos": 4082484342782054400, "step": 1063400 }, { "loss": 2.83, "learning_rate": 1.4547939538712988e-06, "epoch": 4.278455652509746, "total_flos": 4082876859517317120, "step": 1063500 }, { "loss": 2.765, "learning_rate": 1.4539828284753682e-06, "epoch": 4.27885795205394, "total_flos": 4083259699169218560, "step": 1063600 }, { "loss": 2.79, "learning_rate": 1.4531717030794376e-06, "epoch": 4.279260251598135, "total_flos": 4083646145154600960, "step": 1063700 }, { "loss": 2.7825, "learning_rate": 1.452360577683507e-06, "epoch": 4.279662551142329, "total_flos": 4084019190875811840, "step": 1063800 }, { "loss": 2.8175, "learning_rate": 1.4515494522875764e-06, "epoch": 4.280064850686524, "total_flos": 4084393628142489600, "step": 1063900 }, { "loss": 2.7525, "learning_rate": 1.4507383268916458e-06, "epoch": 4.280467150230718, "total_flos": 4084790080508252160, "step": 1064000 }, { "loss": 2.8225, "learning_rate": 1.4499272014957152e-06, "epoch": 4.280869449774913, "total_flos": 4085171969447792640, "step": 1064100 }, { "loss": 2.8075, "learning_rate": 1.4491160760997846e-06, "epoch": 4.281271749319108, "total_flos": 4085566770017218560, "step": 1064200 }, { "loss": 2.775, "learning_rate": 1.448304950703854e-06, "epoch": 4.2816740488633025, "total_flos": 4085965554018324480, "step": 1064300 }, { "loss": 2.775, "learning_rate": 1.4474938253079234e-06, "epoch": 4.2820763484074975, "total_flos": 4086367731903221760, "step": 1064400 }, { "loss": 2.825, "learning_rate": 1.4466826999119928e-06, "epoch": 4.282478647951692, "total_flos": 4086742928677539840, "step": 1064500 }, { "loss": 2.8, "learning_rate": 1.4458715745160622e-06, "epoch": 4.282880947495887, "total_flos": 4087113318777630720, "step": 1064600 }, { "loss": 2.7525, "learning_rate": 1.4450604491201317e-06, "epoch": 4.283283247040081, "total_flos": 4087492849525616640, "step": 1064700 }, { "loss": 2.745, "learning_rate": 1.444249323724201e-06, "epoch": 4.283685546584276, "total_flos": 4087872959199006720, "step": 1064800 }, { "loss": 2.78, "learning_rate": 1.4434381983282709e-06, "epoch": 4.28408784612847, "total_flos": 4088254715357491200, "step": 1064900 }, { "loss": 2.7675, "learning_rate": 1.4426270729323403e-06, "epoch": 4.284490145672665, "total_flos": 4088626433268142080, "step": 1065000 }, { "loss": 2.805, "learning_rate": 1.4418159475364097e-06, "epoch": 4.28489244521686, "total_flos": 4088995782364753920, "step": 1065100 }, { "loss": 2.795, "learning_rate": 1.4410048221404791e-06, "epoch": 4.285294744761054, "total_flos": 4089387874200637440, "step": 1065200 }, { "loss": 2.7975, "learning_rate": 1.4401936967445485e-06, "epoch": 4.285697044305249, "total_flos": 4089771303400427520, "step": 1065300 }, { "loss": 2.795, "learning_rate": 1.439382571348618e-06, "epoch": 4.286099343849443, "total_flos": 4090154408614440960, "step": 1065400 }, { "loss": 2.8075, "learning_rate": 1.4385714459526873e-06, "epoch": 4.286501643393638, "total_flos": 4090538623878082560, "step": 1065500 }, { "loss": 2.7825, "learning_rate": 1.4377603205567567e-06, "epoch": 4.286903942937832, "total_flos": 4090930736958935040, "step": 1065600 }, { "loss": 2.795, "learning_rate": 1.4369491951608261e-06, "epoch": 4.287306242482027, "total_flos": 4091303437449400320, "step": 1065700 }, { "loss": 2.8075, "learning_rate": 1.4361380697648956e-06, "epoch": 4.2877085420262215, "total_flos": 4091694923803668480, "step": 1065800 }, { "loss": 2.76, "learning_rate": 1.435326944368965e-06, "epoch": 4.2881108415704166, "total_flos": 4092073498528051200, "step": 1065900 }, { "loss": 2.805, "learning_rate": 1.4345158189730344e-06, "epoch": 4.288513141114612, "total_flos": 4092434880761303040, "step": 1066000 }, { "loss": 2.855, "learning_rate": 1.4337046935771038e-06, "epoch": 4.288915440658806, "total_flos": 4092805286795120640, "step": 1066100 }, { "loss": 2.775, "learning_rate": 1.4328935681811732e-06, "epoch": 4.289317740203001, "total_flos": 4093178226291486720, "step": 1066200 }, { "loss": 2.755, "learning_rate": 1.4320824427852426e-06, "epoch": 4.289720039747195, "total_flos": 4093558654639411200, "step": 1066300 }, { "loss": 2.83, "learning_rate": 1.431271317389312e-06, "epoch": 4.29012233929139, "total_flos": 4093944017131376640, "step": 1066400 }, { "loss": 2.87, "learning_rate": 1.4304601919933814e-06, "epoch": 4.290524638835584, "total_flos": 4094331031419678720, "step": 1066500 }, { "loss": 2.7725, "learning_rate": 1.4296490665974508e-06, "epoch": 4.290926938379779, "total_flos": 4094707354177351680, "step": 1066600 }, { "loss": 2.745, "learning_rate": 1.4288379412015202e-06, "epoch": 4.291329237923973, "total_flos": 4095094926146088960, "step": 1066700 }, { "loss": 2.78, "learning_rate": 1.4280268158055896e-06, "epoch": 4.291731537468168, "total_flos": 4095481016278241280, "step": 1066800 }, { "loss": 2.7425, "learning_rate": 1.427215690409659e-06, "epoch": 4.292133837012362, "total_flos": 4095863452275732480, "step": 1066900 }, { "loss": 2.7675, "learning_rate": 1.4264045650137284e-06, "epoch": 4.292536136556557, "total_flos": 4096251061423165440, "step": 1067000 }, { "loss": 2.8075, "learning_rate": 1.4255934396177979e-06, "epoch": 4.292938436100751, "total_flos": 4096629073155870720, "step": 1067100 }, { "loss": 2.815, "learning_rate": 1.4247823142218673e-06, "epoch": 4.2933407356449464, "total_flos": 4097019534440386560, "step": 1067200 }, { "loss": 2.755, "learning_rate": 1.4239711888259367e-06, "epoch": 4.2937430351891415, "total_flos": 4097417102167019520, "step": 1067300 }, { "loss": 2.83, "learning_rate": 1.423160063430006e-06, "epoch": 4.294145334733336, "total_flos": 4097818998556078080, "step": 1067400 }, { "loss": 2.78, "learning_rate": 1.4223489380340755e-06, "epoch": 4.294547634277531, "total_flos": 4098213342358671360, "step": 1067500 }, { "loss": 2.7525, "learning_rate": 1.421537812638145e-06, "epoch": 4.294949933821725, "total_flos": 4098597770072002560, "step": 1067600 }, { "loss": 2.8075, "learning_rate": 1.4207266872422143e-06, "epoch": 4.29535223336592, "total_flos": 4098992958362112000, "step": 1067700 }, { "loss": 2.74, "learning_rate": 1.4199155618462837e-06, "epoch": 4.295754532910114, "total_flos": 4099392507182100480, "step": 1067800 }, { "loss": 2.77, "learning_rate": 1.4191044364503531e-06, "epoch": 4.296156832454309, "total_flos": 4099782856930529280, "step": 1067900 }, { "loss": 2.8, "learning_rate": 1.4182933110544225e-06, "epoch": 4.296559131998503, "total_flos": 4100159949818327040, "step": 1068000 }, { "loss": 2.7575, "learning_rate": 1.417482185658492e-06, "epoch": 4.296961431542698, "total_flos": 4100536150417428480, "step": 1068100 }, { "loss": 2.8175, "learning_rate": 1.4166710602625613e-06, "epoch": 4.297363731086893, "total_flos": 4100918921023180800, "step": 1068200 }, { "loss": 2.815, "learning_rate": 1.4158599348666307e-06, "epoch": 4.297766030631087, "total_flos": 4101301776608808960, "step": 1068300 }, { "loss": 2.8125, "learning_rate": 1.4150488094707002e-06, "epoch": 4.298168330175282, "total_flos": 4101691143777423360, "step": 1068400 }, { "loss": 2.8325, "learning_rate": 1.4142376840747696e-06, "epoch": 4.298570629719476, "total_flos": 4102068103884165120, "step": 1068500 }, { "loss": 2.815, "learning_rate": 1.413426558678839e-06, "epoch": 4.298972929263671, "total_flos": 4102456276023275520, "step": 1068600 }, { "loss": 2.8125, "learning_rate": 1.4126154332829084e-06, "epoch": 4.2993752288078655, "total_flos": 4102844915551703040, "step": 1068700 }, { "loss": 2.78, "learning_rate": 1.4118043078869778e-06, "epoch": 4.2997775283520605, "total_flos": 4103225248297267200, "step": 1068800 }, { "loss": 2.7375, "learning_rate": 1.4109931824910472e-06, "epoch": 4.300179827896255, "total_flos": 4103594576148910080, "step": 1068900 }, { "loss": 2.7975, "learning_rate": 1.4101820570951166e-06, "epoch": 4.30058212744045, "total_flos": 4103971998333726720, "step": 1069000 }, { "loss": 2.8725, "learning_rate": 1.409370931699186e-06, "epoch": 4.300984426984645, "total_flos": 4104348692878356480, "step": 1069100 }, { "loss": 2.8125, "learning_rate": 1.4085598063032554e-06, "epoch": 4.301386726528839, "total_flos": 4104742064723619840, "step": 1069200 }, { "loss": 2.8125, "learning_rate": 1.4077486809073248e-06, "epoch": 4.301789026073034, "total_flos": 4105117596106199040, "step": 1069300 }, { "loss": 2.76, "learning_rate": 1.4069375555113942e-06, "epoch": 4.302191325617228, "total_flos": 4105494354385735680, "step": 1069400 }, { "loss": 2.7725, "learning_rate": 1.4061264301154636e-06, "epoch": 4.302593625161423, "total_flos": 4105881591746211840, "step": 1069500 }, { "loss": 2.805, "learning_rate": 1.4053153047195335e-06, "epoch": 4.302995924705617, "total_flos": 4106251774707855360, "step": 1069600 }, { "loss": 2.79, "learning_rate": 1.4045041793236029e-06, "epoch": 4.303398224249812, "total_flos": 4106622796845772800, "step": 1069700 }, { "loss": 2.74, "learning_rate": 1.4036930539276723e-06, "epoch": 4.303800523794006, "total_flos": 4106999879111086080, "step": 1069800 }, { "loss": 2.7375, "learning_rate": 1.4028819285317417e-06, "epoch": 4.304202823338201, "total_flos": 4107392427713802240, "step": 1069900 }, { "loss": 2.79, "learning_rate": 1.402070803135811e-06, "epoch": 4.304605122882395, "total_flos": 4107771283934023680, "step": 1070000 }, { "loss": 2.73, "learning_rate": 1.4012596777398805e-06, "epoch": 4.30500742242659, "total_flos": 4108177716123955200, "step": 1070100 }, { "loss": 2.8325, "learning_rate": 1.40044855234395e-06, "epoch": 4.305409721970785, "total_flos": 4108561230303621120, "step": 1070200 }, { "loss": 2.8225, "learning_rate": 1.3996374269480193e-06, "epoch": 4.3058120215149795, "total_flos": 4108942078239682560, "step": 1070300 }, { "loss": 2.8, "learning_rate": 1.3988263015520887e-06, "epoch": 4.306214321059175, "total_flos": 4109313068510146560, "step": 1070400 }, { "loss": 2.7975, "learning_rate": 1.3980151761561581e-06, "epoch": 4.306616620603369, "total_flos": 4109686841871544320, "step": 1070500 }, { "loss": 2.815, "learning_rate": 1.3972040507602275e-06, "epoch": 4.307018920147564, "total_flos": 4110048224104796160, "step": 1070600 }, { "loss": 2.7825, "learning_rate": 1.396392925364297e-06, "epoch": 4.307421219691758, "total_flos": 4110439338672107520, "step": 1070700 }, { "loss": 2.755, "learning_rate": 1.3955817999683664e-06, "epoch": 4.307823519235953, "total_flos": 4110817488497111040, "step": 1070800 }, { "loss": 2.8225, "learning_rate": 1.3947706745724358e-06, "epoch": 4.308225818780147, "total_flos": 4111196360651059200, "step": 1070900 }, { "loss": 2.7925, "learning_rate": 1.3939595491765052e-06, "epoch": 4.308628118324342, "total_flos": 4111570505799413760, "step": 1071000 }, { "loss": 2.795, "learning_rate": 1.3931484237805746e-06, "epoch": 4.309030417868536, "total_flos": 4111951258133114880, "step": 1071100 }, { "loss": 2.7725, "learning_rate": 1.392337298384644e-06, "epoch": 4.309432717412731, "total_flos": 4112320511627366400, "step": 1071200 }, { "loss": 2.86, "learning_rate": 1.3915261729887134e-06, "epoch": 4.309835016956926, "total_flos": 4112699702455848960, "step": 1071300 }, { "loss": 2.7625, "learning_rate": 1.3907150475927828e-06, "epoch": 4.31023731650112, "total_flos": 4113078861416878080, "step": 1071400 }, { "loss": 2.8225, "learning_rate": 1.3899039221968522e-06, "epoch": 4.310639616045315, "total_flos": 4113475494364876800, "step": 1071500 }, { "loss": 2.82, "learning_rate": 1.3890927968009216e-06, "epoch": 4.311041915589509, "total_flos": 4113866986030387200, "step": 1071600 }, { "loss": 2.78, "learning_rate": 1.388281671404991e-06, "epoch": 4.3114442151337045, "total_flos": 4114249804437319680, "step": 1071700 }, { "loss": 2.7575, "learning_rate": 1.3874705460090604e-06, "epoch": 4.311846514677899, "total_flos": 4114631443748474880, "step": 1071800 }, { "loss": 2.76, "learning_rate": 1.3866594206131298e-06, "epoch": 4.312248814222094, "total_flos": 4115009163362856960, "step": 1071900 }, { "loss": 2.775, "learning_rate": 1.3858482952171993e-06, "epoch": 4.312651113766288, "total_flos": 4115410613607567360, "step": 1072000 }, { "loss": 2.745, "learning_rate": 1.3850371698212687e-06, "epoch": 4.313053413310483, "total_flos": 4115790229335429120, "step": 1072100 }, { "loss": 2.77, "learning_rate": 1.384226044425338e-06, "epoch": 4.313455712854678, "total_flos": 4116187388096409600, "step": 1072200 }, { "loss": 2.825, "learning_rate": 1.3834149190294075e-06, "epoch": 4.313858012398872, "total_flos": 4116571029745889280, "step": 1072300 }, { "loss": 2.83, "learning_rate": 1.3826037936334769e-06, "epoch": 4.314260311943067, "total_flos": 4116950682652446720, "step": 1072400 }, { "loss": 2.77, "learning_rate": 1.3817926682375463e-06, "epoch": 4.314662611487261, "total_flos": 4117334526129131520, "step": 1072500 }, { "loss": 2.8425, "learning_rate": 1.3809815428416157e-06, "epoch": 4.315064911031456, "total_flos": 4117708464139038720, "step": 1072600 }, { "loss": 2.8125, "learning_rate": 1.3801704174456851e-06, "epoch": 4.31546721057565, "total_flos": 4118095324401315840, "step": 1072700 }, { "loss": 2.725, "learning_rate": 1.3793592920497545e-06, "epoch": 4.315869510119845, "total_flos": 4118481302997381120, "step": 1072800 }, { "loss": 2.7575, "learning_rate": 1.378548166653824e-06, "epoch": 4.316271809664039, "total_flos": 4118874095917240320, "step": 1072900 }, { "loss": 2.8375, "learning_rate": 1.3777370412578933e-06, "epoch": 4.316674109208234, "total_flos": 4119255156302991360, "step": 1073000 }, { "loss": 2.7775, "learning_rate": 1.3769259158619627e-06, "epoch": 4.3170764087524285, "total_flos": 4119626120017244160, "step": 1073100 }, { "loss": 2.775, "learning_rate": 1.3761147904660321e-06, "epoch": 4.3174787082966235, "total_flos": 4120021042745241600, "step": 1073200 }, { "loss": 2.8325, "learning_rate": 1.3753036650701016e-06, "epoch": 4.3178810078408185, "total_flos": 4120394688636825600, "step": 1073300 }, { "loss": 2.775, "learning_rate": 1.374492539674171e-06, "epoch": 4.318283307385013, "total_flos": 4120764170514493440, "step": 1073400 }, { "loss": 2.7575, "learning_rate": 1.3736814142782404e-06, "epoch": 4.318685606929208, "total_flos": 4121139600983470080, "step": 1073500 }, { "loss": 2.8075, "learning_rate": 1.3728702888823098e-06, "epoch": 4.319087906473402, "total_flos": 4121497424684421120, "step": 1073600 }, { "loss": 2.8225, "learning_rate": 1.3720591634863792e-06, "epoch": 4.319490206017597, "total_flos": 4121858238614753280, "step": 1073700 }, { "loss": 2.8025, "learning_rate": 1.3712480380904486e-06, "epoch": 4.319892505561791, "total_flos": 4122250882819829760, "step": 1073800 }, { "loss": 2.8375, "learning_rate": 1.370436912694518e-06, "epoch": 4.320294805105986, "total_flos": 4122637010130677760, "step": 1073900 }, { "loss": 2.7175, "learning_rate": 1.3696257872985874e-06, "epoch": 4.32069710465018, "total_flos": 4123016763950837760, "step": 1074000 }, { "loss": 2.8, "learning_rate": 1.3688146619026568e-06, "epoch": 4.321099404194375, "total_flos": 4123427280486051840, "step": 1074100 }, { "loss": 2.8125, "learning_rate": 1.3680035365067262e-06, "epoch": 4.321501703738569, "total_flos": 4123802232943226880, "step": 1074200 }, { "loss": 2.7575, "learning_rate": 1.3671924111107956e-06, "epoch": 4.321904003282764, "total_flos": 4124184530848419840, "step": 1074300 }, { "loss": 2.815, "learning_rate": 1.3663812857148652e-06, "epoch": 4.322306302826959, "total_flos": 4124584456766607360, "step": 1074400 }, { "loss": 2.795, "learning_rate": 1.3655701603189347e-06, "epoch": 4.322708602371153, "total_flos": 4124966398818570240, "step": 1074500 }, { "loss": 2.8275, "learning_rate": 1.364759034923004e-06, "epoch": 4.323110901915348, "total_flos": 4125361337480294400, "step": 1074600 }, { "loss": 2.755, "learning_rate": 1.3639479095270735e-06, "epoch": 4.3235132014595425, "total_flos": 4125735790680698880, "step": 1074700 }, { "loss": 2.7575, "learning_rate": 1.3631367841311429e-06, "epoch": 4.323915501003738, "total_flos": 4126111816008806400, "step": 1074800 }, { "loss": 2.79, "learning_rate": 1.3623256587352123e-06, "epoch": 4.324317800547932, "total_flos": 4126487432371261440, "step": 1074900 }, { "loss": 2.77, "learning_rate": 1.3615145333392817e-06, "epoch": 4.324720100092127, "total_flos": 4126866012406886400, "step": 1075000 }, { "loss": 2.825, "learning_rate": 1.360703407943351e-06, "epoch": 4.325122399636321, "total_flos": 4127257185397862400, "step": 1075100 }, { "loss": 2.8075, "learning_rate": 1.3598922825474207e-06, "epoch": 4.325524699180516, "total_flos": 4127639817911316480, "step": 1075200 }, { "loss": 2.78, "learning_rate": 1.3590811571514901e-06, "epoch": 4.325926998724711, "total_flos": 4128046260723732480, "step": 1075300 }, { "loss": 2.7875, "learning_rate": 1.3582700317555595e-06, "epoch": 4.326329298268905, "total_flos": 4128435149880545280, "step": 1075400 }, { "loss": 2.78, "learning_rate": 1.357458906359629e-06, "epoch": 4.3267315978131, "total_flos": 4128818239160832000, "step": 1075500 }, { "loss": 2.7825, "learning_rate": 1.3566477809636984e-06, "epoch": 4.327133897357294, "total_flos": 4129206209472737280, "step": 1075600 }, { "loss": 2.7575, "learning_rate": 1.3558366555677678e-06, "epoch": 4.327536196901489, "total_flos": 4129582394138112000, "step": 1075700 }, { "loss": 2.775, "learning_rate": 1.3550255301718372e-06, "epoch": 4.327938496445683, "total_flos": 4129966338528399360, "step": 1075800 }, { "loss": 2.77, "learning_rate": 1.3542144047759066e-06, "epoch": 4.328340795989878, "total_flos": 4130355636650864640, "step": 1075900 }, { "loss": 2.735, "learning_rate": 1.353403279379976e-06, "epoch": 4.328743095534072, "total_flos": 4130740260880158720, "step": 1076000 }, { "loss": 2.7775, "learning_rate": 1.3525921539840454e-06, "epoch": 4.3291453950782675, "total_flos": 4131141881084620800, "step": 1076100 }, { "loss": 2.8275, "learning_rate": 1.3517810285881148e-06, "epoch": 4.329547694622462, "total_flos": 4131544871589580800, "step": 1076200 }, { "loss": 2.79, "learning_rate": 1.3509699031921842e-06, "epoch": 4.329949994166657, "total_flos": 4131926117868810240, "step": 1076300 }, { "loss": 2.795, "learning_rate": 1.3501587777962536e-06, "epoch": 4.330352293710852, "total_flos": 4132299200768716800, "step": 1076400 }, { "loss": 2.79, "learning_rate": 1.349347652400323e-06, "epoch": 4.330754593255046, "total_flos": 4132685397125713920, "step": 1076500 }, { "loss": 2.7875, "learning_rate": 1.3485365270043924e-06, "epoch": 4.331156892799241, "total_flos": 4133071242940723200, "step": 1076600 }, { "loss": 2.7575, "learning_rate": 1.3477254016084618e-06, "epoch": 4.331559192343435, "total_flos": 4133459739065610240, "step": 1076700 }, { "loss": 2.7675, "learning_rate": 1.3469142762125312e-06, "epoch": 4.33196149188763, "total_flos": 4133827691305512960, "step": 1076800 }, { "loss": 2.7975, "learning_rate": 1.3461031508166006e-06, "epoch": 4.332363791431824, "total_flos": 4134215900623319040, "step": 1076900 }, { "loss": 2.85, "learning_rate": 1.34529202542067e-06, "epoch": 4.332766090976019, "total_flos": 4134605071275970560, "step": 1077000 }, { "loss": 2.795, "learning_rate": 1.3444809000247395e-06, "epoch": 4.333168390520213, "total_flos": 4135003849965834240, "step": 1077100 }, { "loss": 2.8075, "learning_rate": 1.3436697746288089e-06, "epoch": 4.333570690064408, "total_flos": 4135375870617292800, "step": 1077200 }, { "loss": 2.765, "learning_rate": 1.3428586492328783e-06, "epoch": 4.333972989608602, "total_flos": 4135760638250127360, "step": 1077300 }, { "loss": 2.755, "learning_rate": 1.3420475238369477e-06, "epoch": 4.334375289152797, "total_flos": 4136119784450396160, "step": 1077400 }, { "loss": 2.78, "learning_rate": 1.341236398441017e-06, "epoch": 4.334777588696992, "total_flos": 4136500196864593920, "step": 1077500 }, { "loss": 2.7825, "learning_rate": 1.3404252730450865e-06, "epoch": 4.3351798882411865, "total_flos": 4136890042045009920, "step": 1077600 }, { "loss": 2.815, "learning_rate": 1.339614147649156e-06, "epoch": 4.3355821877853815, "total_flos": 4137266842814484480, "step": 1077700 }, { "loss": 2.82, "learning_rate": 1.3388030222532253e-06, "epoch": 4.335984487329576, "total_flos": 4137650548198871040, "step": 1077800 }, { "loss": 2.8275, "learning_rate": 1.3379918968572947e-06, "epoch": 4.336386786873771, "total_flos": 4138043171158978560, "step": 1077900 }, { "loss": 2.82, "learning_rate": 1.3371807714613641e-06, "epoch": 4.336789086417965, "total_flos": 4138433951118028800, "step": 1078000 }, { "loss": 2.8475, "learning_rate": 1.3363696460654335e-06, "epoch": 4.33719138596216, "total_flos": 4138825325936209920, "step": 1078100 }, { "loss": 2.81, "learning_rate": 1.335558520669503e-06, "epoch": 4.337593685506354, "total_flos": 4139206800598855680, "step": 1078200 }, { "loss": 2.7925, "learning_rate": 1.3347473952735724e-06, "epoch": 4.337995985050549, "total_flos": 4139584498968268800, "step": 1078300 }, { "loss": 2.745, "learning_rate": 1.3339362698776418e-06, "epoch": 4.338398284594744, "total_flos": 4139991483527393280, "step": 1078400 }, { "loss": 2.805, "learning_rate": 1.3331251444817112e-06, "epoch": 4.338800584138938, "total_flos": 4140375072064450560, "step": 1078500 }, { "loss": 2.795, "learning_rate": 1.3323140190857806e-06, "epoch": 4.339202883683133, "total_flos": 4140741558401495040, "step": 1078600 }, { "loss": 2.8025, "learning_rate": 1.33150289368985e-06, "epoch": 4.339605183227327, "total_flos": 4141122581608550400, "step": 1078700 }, { "loss": 2.8175, "learning_rate": 1.3306917682939194e-06, "epoch": 4.340007482771522, "total_flos": 4141523458239098880, "step": 1078800 }, { "loss": 2.775, "learning_rate": 1.3298806428979888e-06, "epoch": 4.340409782315716, "total_flos": 4141904449578700800, "step": 1078900 }, { "loss": 2.7925, "learning_rate": 1.3290695175020582e-06, "epoch": 4.340812081859911, "total_flos": 4142292233997127680, "step": 1079000 }, { "loss": 2.735, "learning_rate": 1.3282583921061278e-06, "epoch": 4.3412143814041055, "total_flos": 4142686482197360640, "step": 1079100 }, { "loss": 2.78, "learning_rate": 1.3274472667101972e-06, "epoch": 4.3416166809483006, "total_flos": 4143067680675409920, "step": 1079200 }, { "loss": 2.835, "learning_rate": 1.3266361413142666e-06, "epoch": 4.342018980492496, "total_flos": 4143452995366195200, "step": 1079300 }, { "loss": 2.7575, "learning_rate": 1.325825015918336e-06, "epoch": 4.34242128003669, "total_flos": 4143826763416350720, "step": 1079400 }, { "loss": 2.855, "learning_rate": 1.3250138905224055e-06, "epoch": 4.342823579580885, "total_flos": 4144206873089740800, "step": 1079500 }, { "loss": 2.7975, "learning_rate": 1.3242027651264749e-06, "epoch": 4.343225879125079, "total_flos": 4144596516442951680, "step": 1079600 }, { "loss": 2.79, "learning_rate": 1.3233916397305443e-06, "epoch": 4.343628178669274, "total_flos": 4144981889557401600, "step": 1079700 }, { "loss": 2.785, "learning_rate": 1.3225805143346137e-06, "epoch": 4.344030478213468, "total_flos": 4145361298146816000, "step": 1079800 }, { "loss": 2.7875, "learning_rate": 1.321769388938683e-06, "epoch": 4.344432777757663, "total_flos": 4145760692940779520, "step": 1079900 }, { "loss": 2.8075, "learning_rate": 1.3209582635427525e-06, "epoch": 4.344835077301857, "total_flos": 4146144716999700480, "step": 1080000 }, { "loss": 2.7975, "learning_rate": 1.320147138146822e-06, "epoch": 4.345237376846052, "total_flos": 4146526945858744320, "step": 1080100 }, { "loss": 2.82, "learning_rate": 1.3193360127508913e-06, "epoch": 4.345639676390246, "total_flos": 4146900873246167040, "step": 1080200 }, { "loss": 2.835, "learning_rate": 1.3185248873549607e-06, "epoch": 4.346041975934441, "total_flos": 4147293448405094400, "step": 1080300 }, { "loss": 2.7825, "learning_rate": 1.3177137619590301e-06, "epoch": 4.346444275478635, "total_flos": 4147659084943380480, "step": 1080400 }, { "loss": 2.8125, "learning_rate": 1.3169026365630995e-06, "epoch": 4.3468465750228305, "total_flos": 4148048579581808640, "step": 1080500 }, { "loss": 2.8225, "learning_rate": 1.316091511167169e-06, "epoch": 4.3472488745670255, "total_flos": 4148430898731970560, "step": 1080600 }, { "loss": 2.78, "learning_rate": 1.3152803857712384e-06, "epoch": 4.34765117411122, "total_flos": 4148822868409282560, "step": 1080700 }, { "loss": 2.8275, "learning_rate": 1.314469260375308e-06, "epoch": 4.348053473655415, "total_flos": 4149185599698063360, "step": 1080800 }, { "loss": 2.8175, "learning_rate": 1.3136581349793774e-06, "epoch": 4.348455773199609, "total_flos": 4149562634162196480, "step": 1080900 }, { "loss": 2.8275, "learning_rate": 1.3128470095834468e-06, "epoch": 4.348858072743804, "total_flos": 4149949505046958080, "step": 1081000 }, { "loss": 2.7925, "learning_rate": 1.3120358841875162e-06, "epoch": 4.349260372287998, "total_flos": 4150333529105879040, "step": 1081100 }, { "loss": 2.81, "learning_rate": 1.3112247587915856e-06, "epoch": 4.349662671832193, "total_flos": 4150708104464855040, "step": 1081200 }, { "loss": 2.795, "learning_rate": 1.310413633395655e-06, "epoch": 4.350064971376387, "total_flos": 4151092882720174080, "step": 1081300 }, { "loss": 2.79, "learning_rate": 1.3096025079997244e-06, "epoch": 4.350467270920582, "total_flos": 4151473491650334720, "step": 1081400 }, { "loss": 2.825, "learning_rate": 1.3087913826037938e-06, "epoch": 4.350869570464777, "total_flos": 4151852215089500160, "step": 1081500 }, { "loss": 2.81, "learning_rate": 1.3079802572078632e-06, "epoch": 4.351271870008971, "total_flos": 4152245544444825600, "step": 1081600 }, { "loss": 2.81, "learning_rate": 1.3071691318119326e-06, "epoch": 4.351674169553166, "total_flos": 4152645911196119040, "step": 1081700 }, { "loss": 2.82, "learning_rate": 1.306358006416002e-06, "epoch": 4.35207646909736, "total_flos": 4153018006204968960, "step": 1081800 }, { "loss": 2.7725, "learning_rate": 1.3055468810200715e-06, "epoch": 4.352478768641555, "total_flos": 4153403347451965440, "step": 1081900 }, { "loss": 2.785, "learning_rate": 1.3047357556241409e-06, "epoch": 4.3528810681857495, "total_flos": 4153789182644490240, "step": 1082000 }, { "loss": 2.77, "learning_rate": 1.3039246302282103e-06, "epoch": 4.3532833677299445, "total_flos": 4154157867835822080, "step": 1082100 }, { "loss": 2.7675, "learning_rate": 1.3031135048322797e-06, "epoch": 4.353685667274139, "total_flos": 4154528061419950080, "step": 1082200 }, { "loss": 2.825, "learning_rate": 1.302302379436349e-06, "epoch": 4.354087966818334, "total_flos": 4154913174283530240, "step": 1082300 }, { "loss": 2.765, "learning_rate": 1.3014912540404185e-06, "epoch": 4.354490266362529, "total_flos": 4155309515113205760, "step": 1082400 }, { "loss": 2.8475, "learning_rate": 1.300680128644488e-06, "epoch": 4.354892565906723, "total_flos": 4155710466101145600, "step": 1082500 }, { "loss": 2.7675, "learning_rate": 1.2998690032485573e-06, "epoch": 4.355294865450918, "total_flos": 4156109945874984960, "step": 1082600 }, { "loss": 2.7475, "learning_rate": 1.2990578778526267e-06, "epoch": 4.355697164995112, "total_flos": 4156488600268001280, "step": 1082700 }, { "loss": 2.8175, "learning_rate": 1.2982467524566961e-06, "epoch": 4.356099464539307, "total_flos": 4156879348359598080, "step": 1082800 }, { "loss": 2.7625, "learning_rate": 1.2974356270607655e-06, "epoch": 4.356501764083501, "total_flos": 4157265050771066880, "step": 1082900 }, { "loss": 2.785, "learning_rate": 1.296624501664835e-06, "epoch": 4.356904063627696, "total_flos": 4157647938224148480, "step": 1083000 }, { "loss": 2.7775, "learning_rate": 1.2958133762689043e-06, "epoch": 4.35730636317189, "total_flos": 4158014658255851520, "step": 1083100 }, { "loss": 2.845, "learning_rate": 1.2950022508729738e-06, "epoch": 4.357708662716085, "total_flos": 4158395686774149120, "step": 1083200 }, { "loss": 2.7925, "learning_rate": 1.2941911254770432e-06, "epoch": 4.358110962260279, "total_flos": 4158777357952757760, "step": 1083300 }, { "loss": 2.7925, "learning_rate": 1.2933800000811126e-06, "epoch": 4.358513261804474, "total_flos": 4159152310409932800, "step": 1083400 }, { "loss": 2.775, "learning_rate": 1.292568874685182e-06, "epoch": 4.3589155613486685, "total_flos": 4159558285833031680, "step": 1083500 }, { "loss": 2.83, "learning_rate": 1.2917577492892514e-06, "epoch": 4.3593178608928635, "total_flos": 4159941401669529600, "step": 1083600 }, { "loss": 2.7325, "learning_rate": 1.2909466238933208e-06, "epoch": 4.359720160437059, "total_flos": 4160314638595461120, "step": 1083700 }, { "loss": 2.805, "learning_rate": 1.2901354984973904e-06, "epoch": 4.360122459981253, "total_flos": 4160693287677235200, "step": 1083800 }, { "loss": 2.835, "learning_rate": 1.2893243731014598e-06, "epoch": 4.360524759525448, "total_flos": 4161075240351682560, "step": 1083900 }, { "loss": 2.825, "learning_rate": 1.2885132477055292e-06, "epoch": 4.360927059069642, "total_flos": 4161452651914014720, "step": 1084000 }, { "loss": 2.76, "learning_rate": 1.2877021223095986e-06, "epoch": 4.361329358613837, "total_flos": 4161845466078842880, "step": 1084100 }, { "loss": 2.8525, "learning_rate": 1.286890996913668e-06, "epoch": 4.361731658158031, "total_flos": 4162219159771607040, "step": 1084200 }, { "loss": 2.7725, "learning_rate": 1.2860798715177374e-06, "epoch": 4.362133957702226, "total_flos": 4162609514831278080, "step": 1084300 }, { "loss": 2.7275, "learning_rate": 1.2852687461218069e-06, "epoch": 4.36253625724642, "total_flos": 4162993793829826560, "step": 1084400 }, { "loss": 2.8175, "learning_rate": 1.2844576207258763e-06, "epoch": 4.362938556790615, "total_flos": 4163367476900106240, "step": 1084500 }, { "loss": 2.79, "learning_rate": 1.2836464953299457e-06, "epoch": 4.36334085633481, "total_flos": 4163756626307788800, "step": 1084600 }, { "loss": 2.79, "learning_rate": 1.282835369934015e-06, "epoch": 4.363743155879004, "total_flos": 4164141404563107840, "step": 1084700 }, { "loss": 2.83, "learning_rate": 1.2820242445380845e-06, "epoch": 4.364145455423199, "total_flos": 4164531063850045440, "step": 1084800 }, { "loss": 2.7975, "learning_rate": 1.281213119142154e-06, "epoch": 4.3645477549673934, "total_flos": 4164903785585479680, "step": 1084900 }, { "loss": 2.7725, "learning_rate": 1.2804019937462233e-06, "epoch": 4.3649500545115885, "total_flos": 4165280575732469760, "step": 1085000 }, { "loss": 2.8475, "learning_rate": 1.2795908683502927e-06, "epoch": 4.365352354055783, "total_flos": 4165667032340336640, "step": 1085100 }, { "loss": 2.78, "learning_rate": 1.2787797429543621e-06, "epoch": 4.365754653599978, "total_flos": 4166050073819443200, "step": 1085200 }, { "loss": 2.7825, "learning_rate": 1.2779686175584315e-06, "epoch": 4.366156953144172, "total_flos": 4166432732889108480, "step": 1085300 }, { "loss": 2.795, "learning_rate": 1.277157492162501e-06, "epoch": 4.366559252688367, "total_flos": 4166805592716840960, "step": 1085400 }, { "loss": 2.815, "learning_rate": 1.2763463667665703e-06, "epoch": 4.366961552232562, "total_flos": 4167178925245132800, "step": 1085500 }, { "loss": 2.7575, "learning_rate": 1.2755352413706397e-06, "epoch": 4.367363851776756, "total_flos": 4167569184702443520, "step": 1085600 }, { "loss": 2.7825, "learning_rate": 1.2747241159747092e-06, "epoch": 4.367766151320951, "total_flos": 4167939282684211200, "step": 1085700 }, { "loss": 2.8125, "learning_rate": 1.2739129905787786e-06, "epoch": 4.368168450865145, "total_flos": 4168316975742382080, "step": 1085800 }, { "loss": 2.8175, "learning_rate": 1.273101865182848e-06, "epoch": 4.36857075040934, "total_flos": 4168707213954723840, "step": 1085900 }, { "loss": 2.8225, "learning_rate": 1.2722907397869174e-06, "epoch": 4.368973049953534, "total_flos": 4169100681402347520, "step": 1086000 }, { "loss": 2.8125, "learning_rate": 1.2714796143909868e-06, "epoch": 4.369375349497729, "total_flos": 4169485693352325120, "step": 1086100 }, { "loss": 2.8175, "learning_rate": 1.2706684889950562e-06, "epoch": 4.369777649041923, "total_flos": 4169880339895726080, "step": 1086200 }, { "loss": 2.7725, "learning_rate": 1.2698573635991256e-06, "epoch": 4.370179948586118, "total_flos": 4170254766539919360, "step": 1086300 }, { "loss": 2.8225, "learning_rate": 1.269046238203195e-06, "epoch": 4.3705822481303125, "total_flos": 4170630595352064000, "step": 1086400 }, { "loss": 2.765, "learning_rate": 1.2682351128072646e-06, "epoch": 4.3709845476745075, "total_flos": 4171007098691973120, "step": 1086500 }, { "loss": 2.8025, "learning_rate": 1.267423987411334e-06, "epoch": 4.371386847218702, "total_flos": 4171404432723947520, "step": 1086600 }, { "loss": 2.8075, "learning_rate": 1.2666128620154034e-06, "epoch": 4.371789146762897, "total_flos": 4171787601672867840, "step": 1086700 }, { "loss": 2.7775, "learning_rate": 1.2658017366194729e-06, "epoch": 4.372191446307092, "total_flos": 4172169240984023040, "step": 1086800 }, { "loss": 2.7725, "learning_rate": 1.2649906112235423e-06, "epoch": 4.372593745851286, "total_flos": 4172562108261273600, "step": 1086900 }, { "loss": 2.795, "learning_rate": 1.2641794858276117e-06, "epoch": 4.372996045395481, "total_flos": 4172936163118510080, "step": 1087000 }, { "loss": 2.805, "learning_rate": 1.263368360431681e-06, "epoch": 4.373398344939675, "total_flos": 4173321116644823040, "step": 1087100 }, { "loss": 2.8325, "learning_rate": 1.2625572350357505e-06, "epoch": 4.37380064448387, "total_flos": 4173723953123758080, "step": 1087200 }, { "loss": 2.7975, "learning_rate": 1.2617461096398199e-06, "epoch": 4.374202944028064, "total_flos": 4174116283965542400, "step": 1087300 }, { "loss": 2.8275, "learning_rate": 1.2609349842438893e-06, "epoch": 4.374605243572259, "total_flos": 4174501763304837120, "step": 1087400 }, { "loss": 2.8025, "learning_rate": 1.2601238588479587e-06, "epoch": 4.375007543116453, "total_flos": 4174891911226060800, "step": 1087500 }, { "loss": 2.72, "learning_rate": 1.2593127334520281e-06, "epoch": 4.375409842660648, "total_flos": 4175266969908080640, "step": 1087600 }, { "loss": 2.82, "learning_rate": 1.2585016080560975e-06, "epoch": 4.375812142204843, "total_flos": 4175642750919045120, "step": 1087700 }, { "loss": 2.8125, "learning_rate": 1.257690482660167e-06, "epoch": 4.376214441749037, "total_flos": 4176028702958899200, "step": 1087800 }, { "loss": 2.795, "learning_rate": 1.2568793572642363e-06, "epoch": 4.376616741293232, "total_flos": 4176421469322547200, "step": 1087900 }, { "loss": 2.7825, "learning_rate": 1.2560682318683057e-06, "epoch": 4.3770190408374265, "total_flos": 4176808627014389760, "step": 1088000 }, { "loss": 2.8025, "learning_rate": 1.2552571064723751e-06, "epoch": 4.377421340381622, "total_flos": 4177183696318894080, "step": 1088100 }, { "loss": 2.81, "learning_rate": 1.2544459810764446e-06, "epoch": 4.377823639925816, "total_flos": 4177564926664396800, "step": 1088200 }, { "loss": 2.81, "learning_rate": 1.253634855680514e-06, "epoch": 4.378225939470011, "total_flos": 4177932751434485760, "step": 1088300 }, { "loss": 2.8375, "learning_rate": 1.2528237302845834e-06, "epoch": 4.378628239014205, "total_flos": 4178333845825966080, "step": 1088400 }, { "loss": 2.705, "learning_rate": 1.2520126048886528e-06, "epoch": 4.3790305385584, "total_flos": 4178709775551713280, "step": 1088500 }, { "loss": 2.7875, "learning_rate": 1.2512014794927224e-06, "epoch": 4.379432838102595, "total_flos": 4179107624774184960, "step": 1088600 }, { "loss": 2.8225, "learning_rate": 1.2503903540967918e-06, "epoch": 4.379835137646789, "total_flos": 4179494660307456000, "step": 1088700 }, { "loss": 2.7925, "learning_rate": 1.249579228700861e-06, "epoch": 4.380237437190984, "total_flos": 4179872130293452800, "step": 1088800 }, { "loss": 2.77, "learning_rate": 1.2487681033049304e-06, "epoch": 4.380639736735178, "total_flos": 4180259447322562560, "step": 1088900 }, { "loss": 2.8175, "learning_rate": 1.2479569779089998e-06, "epoch": 4.381042036279373, "total_flos": 4180641962988687360, "step": 1089000 }, { "loss": 2.7875, "learning_rate": 1.2471458525130692e-06, "epoch": 4.381444335823567, "total_flos": 4181012507114803200, "step": 1089100 }, { "loss": 2.765, "learning_rate": 1.2463347271171386e-06, "epoch": 4.381846635367762, "total_flos": 4181386413257256960, "step": 1089200 }, { "loss": 2.8225, "learning_rate": 1.2455236017212083e-06, "epoch": 4.382248934911956, "total_flos": 4181771749193011200, "step": 1089300 }, { "loss": 2.885, "learning_rate": 1.2447124763252777e-06, "epoch": 4.3826512344561515, "total_flos": 4182163198368583680, "step": 1089400 }, { "loss": 2.825, "learning_rate": 1.243901350929347e-06, "epoch": 4.383053534000346, "total_flos": 4182542203303587840, "step": 1089500 }, { "loss": 2.815, "learning_rate": 1.2430902255334165e-06, "epoch": 4.383455833544541, "total_flos": 4182909321678458880, "step": 1089600 }, { "loss": 2.8125, "learning_rate": 1.2422791001374859e-06, "epoch": 4.383858133088736, "total_flos": 4183275090997800960, "step": 1089700 }, { "loss": 2.7825, "learning_rate": 1.2414679747415553e-06, "epoch": 4.38426043263293, "total_flos": 4183649565443174400, "step": 1089800 }, { "loss": 2.8075, "learning_rate": 1.2406568493456247e-06, "epoch": 4.384662732177125, "total_flos": 4184041683835269120, "step": 1089900 }, { "loss": 2.7975, "learning_rate": 1.2398457239496941e-06, "epoch": 4.385065031721319, "total_flos": 4184416646914928640, "step": 1090000 }, { "loss": 2.8375, "learning_rate": 1.2390345985537635e-06, "epoch": 4.385467331265514, "total_flos": 4184808690949632000, "step": 1090100 }, { "loss": 2.8025, "learning_rate": 1.238223473157833e-06, "epoch": 4.385869630809708, "total_flos": 4185192608783708160, "step": 1090200 }, { "loss": 2.815, "learning_rate": 1.2374123477619023e-06, "epoch": 4.386271930353903, "total_flos": 4185576271678156800, "step": 1090300 }, { "loss": 2.8, "learning_rate": 1.2366012223659717e-06, "epoch": 4.386674229898097, "total_flos": 4185958298709995520, "step": 1090400 }, { "loss": 2.8075, "learning_rate": 1.2357900969700411e-06, "epoch": 4.387076529442292, "total_flos": 4186352445996625920, "step": 1090500 }, { "loss": 2.7675, "learning_rate": 1.2349789715741106e-06, "epoch": 4.387478828986486, "total_flos": 4186740171991388160, "step": 1090600 }, { "loss": 2.7825, "learning_rate": 1.23416784617818e-06, "epoch": 4.387881128530681, "total_flos": 4187125683198136320, "step": 1090700 }, { "loss": 2.8025, "learning_rate": 1.2333567207822494e-06, "epoch": 4.388283428074876, "total_flos": 4187518401760604160, "step": 1090800 }, { "loss": 2.825, "learning_rate": 1.2325455953863188e-06, "epoch": 4.3886857276190705, "total_flos": 4187920452175687680, "step": 1090900 }, { "loss": 2.775, "learning_rate": 1.2317344699903882e-06, "epoch": 4.3890880271632655, "total_flos": 4188313494723932160, "step": 1091000 }, { "loss": 2.7425, "learning_rate": 1.2309233445944576e-06, "epoch": 4.38949032670746, "total_flos": 4188689774991667200, "step": 1091100 }, { "loss": 2.7925, "learning_rate": 1.230112219198527e-06, "epoch": 4.389892626251655, "total_flos": 4189052803710013440, "step": 1091200 }, { "loss": 2.75, "learning_rate": 1.2293010938025964e-06, "epoch": 4.390294925795849, "total_flos": 4189437135820984320, "step": 1091300 }, { "loss": 2.8175, "learning_rate": 1.2284899684066658e-06, "epoch": 4.390697225340044, "total_flos": 4189819603685928960, "step": 1091400 }, { "loss": 2.7175, "learning_rate": 1.2276788430107352e-06, "epoch": 4.391099524884238, "total_flos": 4190218557646786560, "step": 1091500 }, { "loss": 2.7975, "learning_rate": 1.2268677176148046e-06, "epoch": 4.391501824428433, "total_flos": 4190611770154782720, "step": 1091600 }, { "loss": 2.8325, "learning_rate": 1.226056592218874e-06, "epoch": 4.391904123972628, "total_flos": 4190985686919720960, "step": 1091700 }, { "loss": 2.8175, "learning_rate": 1.2252454668229434e-06, "epoch": 4.392306423516822, "total_flos": 4191379234035978240, "step": 1091800 }, { "loss": 2.8275, "learning_rate": 1.2244343414270129e-06, "epoch": 4.392708723061017, "total_flos": 4191771272759439360, "step": 1091900 }, { "loss": 2.8075, "learning_rate": 1.2236232160310823e-06, "epoch": 4.393111022605211, "total_flos": 4192146867876925440, "step": 1092000 }, { "loss": 2.7875, "learning_rate": 1.2228120906351519e-06, "epoch": 4.393513322149406, "total_flos": 4192529696906342400, "step": 1092100 }, { "loss": 2.72, "learning_rate": 1.2220009652392213e-06, "epoch": 4.3939156216936, "total_flos": 4192917651284520960, "step": 1092200 }, { "loss": 2.7525, "learning_rate": 1.2211898398432907e-06, "epoch": 4.394317921237795, "total_flos": 4193292975528652800, "step": 1092300 }, { "loss": 2.85, "learning_rate": 1.22037871444736e-06, "epoch": 4.3947202207819895, "total_flos": 4193669006168002560, "step": 1092400 }, { "loss": 2.7925, "learning_rate": 1.2195675890514295e-06, "epoch": 4.395122520326185, "total_flos": 4194055271571148800, "step": 1092500 }, { "loss": 2.8075, "learning_rate": 1.218756463655499e-06, "epoch": 4.395524819870379, "total_flos": 4194434850120314880, "step": 1092600 }, { "loss": 2.8025, "learning_rate": 1.2179453382595683e-06, "epoch": 4.395927119414574, "total_flos": 4194814864191344640, "step": 1092700 }, { "loss": 2.78, "learning_rate": 1.2171342128636377e-06, "epoch": 4.396329418958769, "total_flos": 4195211162531082240, "step": 1092800 }, { "loss": 2.805, "learning_rate": 1.2163230874677071e-06, "epoch": 4.396731718502963, "total_flos": 4195586683291176960, "step": 1092900 }, { "loss": 2.775, "learning_rate": 1.2155119620717765e-06, "epoch": 4.397134018047158, "total_flos": 4195967191307735040, "step": 1093000 }, { "loss": 2.7775, "learning_rate": 1.214700836675846e-06, "epoch": 4.397536317591352, "total_flos": 4196360849960079360, "step": 1093100 }, { "loss": 2.745, "learning_rate": 1.2138897112799154e-06, "epoch": 4.397938617135547, "total_flos": 4196746100915957760, "step": 1093200 }, { "loss": 2.7625, "learning_rate": 1.2130785858839848e-06, "epoch": 4.398340916679741, "total_flos": 4197121488894996480, "step": 1093300 }, { "loss": 2.7825, "learning_rate": 1.2122674604880542e-06, "epoch": 4.398743216223936, "total_flos": 4197500738147143680, "step": 1093400 }, { "loss": 2.775, "learning_rate": 1.2114563350921236e-06, "epoch": 4.39914551576813, "total_flos": 4197879626234818560, "step": 1093500 }, { "loss": 2.8275, "learning_rate": 1.210645209696193e-06, "epoch": 4.399547815312325, "total_flos": 4198243499440680960, "step": 1093600 }, { "loss": 2.78, "learning_rate": 1.2098340843002624e-06, "epoch": 4.399950114856519, "total_flos": 4198611499481763840, "step": 1093700 }, { "loss": 2.8225, "learning_rate": 1.2090229589043318e-06, "epoch": 4.4003524144007145, "total_flos": 4198977916772659200, "step": 1093800 }, { "loss": 2.7925, "learning_rate": 1.2082118335084012e-06, "epoch": 4.4007547139449095, "total_flos": 4199370146700840960, "step": 1093900 }, { "loss": 2.775, "learning_rate": 1.2074007081124706e-06, "epoch": 4.401157013489104, "total_flos": 4199778108528537600, "step": 1094000 }, { "loss": 2.735, "learning_rate": 1.20658958271654e-06, "epoch": 4.401559313033299, "total_flos": 4200156051215093760, "step": 1094100 }, { "loss": 2.855, "learning_rate": 1.2057784573206094e-06, "epoch": 4.401961612577493, "total_flos": 4200536936329850880, "step": 1094200 }, { "loss": 2.725, "learning_rate": 1.2049673319246788e-06, "epoch": 4.402363912121688, "total_flos": 4200924444563681280, "step": 1094300 }, { "loss": 2.75, "learning_rate": 1.2041562065287483e-06, "epoch": 4.402766211665882, "total_flos": 4201314077294407680, "step": 1094400 }, { "loss": 2.77, "learning_rate": 1.2033450811328179e-06, "epoch": 4.403168511210077, "total_flos": 4201702791180226560, "step": 1094500 }, { "loss": 2.795, "learning_rate": 1.2025339557368873e-06, "epoch": 4.403570810754271, "total_flos": 4202085051906723840, "step": 1094600 }, { "loss": 2.7775, "learning_rate": 1.2017228303409567e-06, "epoch": 4.403973110298466, "total_flos": 4202472719477821440, "step": 1094700 }, { "loss": 2.775, "learning_rate": 1.200911704945026e-06, "epoch": 4.404375409842661, "total_flos": 4202882646465146880, "step": 1094800 }, { "loss": 2.7425, "learning_rate": 1.2001005795490955e-06, "epoch": 4.404777709386855, "total_flos": 4203271928653885440, "step": 1094900 }, { "loss": 2.83, "learning_rate": 1.199289454153165e-06, "epoch": 4.40518000893105, "total_flos": 4203653583898767360, "step": 1095000 }, { "loss": 2.7475, "learning_rate": 1.1984783287572343e-06, "epoch": 4.405582308475244, "total_flos": 4204028254860103680, "step": 1095100 }, { "loss": 2.8075, "learning_rate": 1.1976672033613037e-06, "epoch": 4.405984608019439, "total_flos": 4204412395766353920, "step": 1095200 }, { "loss": 2.775, "learning_rate": 1.1968560779653731e-06, "epoch": 4.4063869075636335, "total_flos": 4204797428961300480, "step": 1095300 }, { "loss": 2.785, "learning_rate": 1.1960449525694425e-06, "epoch": 4.4067892071078285, "total_flos": 4205164218039152640, "step": 1095400 }, { "loss": 2.7925, "learning_rate": 1.195233827173512e-06, "epoch": 4.407191506652023, "total_flos": 4205540386770800640, "step": 1095500 }, { "loss": 2.8575, "learning_rate": 1.1944227017775814e-06, "epoch": 4.407593806196218, "total_flos": 4205924001864069120, "step": 1095600 }, { "loss": 2.775, "learning_rate": 1.1936115763816508e-06, "epoch": 4.407996105740412, "total_flos": 4206307409818890240, "step": 1095700 }, { "loss": 2.8325, "learning_rate": 1.1928004509857202e-06, "epoch": 4.408398405284607, "total_flos": 4206694216968744960, "step": 1095800 }, { "loss": 2.8, "learning_rate": 1.1919893255897896e-06, "epoch": 4.408800704828802, "total_flos": 4207067581364490240, "step": 1095900 }, { "loss": 2.755, "learning_rate": 1.191178200193859e-06, "epoch": 4.409203004372996, "total_flos": 4207442910919864320, "step": 1096000 }, { "loss": 2.7575, "learning_rate": 1.1903670747979284e-06, "epoch": 4.409605303917191, "total_flos": 4207823418936422400, "step": 1096100 }, { "loss": 2.805, "learning_rate": 1.1895559494019978e-06, "epoch": 4.410007603461385, "total_flos": 4208194302982041600, "step": 1096200 }, { "loss": 2.7875, "learning_rate": 1.1887448240060672e-06, "epoch": 4.41040990300558, "total_flos": 4208575820134625280, "step": 1096300 }, { "loss": 2.7775, "learning_rate": 1.1879336986101366e-06, "epoch": 4.410812202549774, "total_flos": 4208966286730383360, "step": 1096400 }, { "loss": 2.8, "learning_rate": 1.187122573214206e-06, "epoch": 4.411214502093969, "total_flos": 4209371322063605760, "step": 1096500 }, { "loss": 2.7825, "learning_rate": 1.1863114478182754e-06, "epoch": 4.411616801638163, "total_flos": 4209749811808112640, "step": 1096600 }, { "loss": 2.765, "learning_rate": 1.1855003224223448e-06, "epoch": 4.412019101182358, "total_flos": 4210123585169510400, "step": 1096700 }, { "loss": 2.75, "learning_rate": 1.1846891970264142e-06, "epoch": 4.4124214007265525, "total_flos": 4210507943836692480, "step": 1096800 }, { "loss": 2.77, "learning_rate": 1.1838780716304839e-06, "epoch": 4.4128237002707476, "total_flos": 4210871046912430080, "step": 1096900 }, { "loss": 2.815, "learning_rate": 1.1830669462345533e-06, "epoch": 4.413225999814943, "total_flos": 4211246307421655040, "step": 1097000 }, { "loss": 2.8525, "learning_rate": 1.1822558208386227e-06, "epoch": 4.413628299359137, "total_flos": 4211628754041630720, "step": 1097100 }, { "loss": 2.8075, "learning_rate": 1.181444695442692e-06, "epoch": 4.414030598903332, "total_flos": 4212023724570808320, "step": 1097200 }, { "loss": 2.775, "learning_rate": 1.1806335700467615e-06, "epoch": 4.414432898447526, "total_flos": 4212398374287175680, "step": 1097300 }, { "loss": 2.8125, "learning_rate": 1.179822444650831e-06, "epoch": 4.414835197991721, "total_flos": 4212779349693050880, "step": 1097400 }, { "loss": 2.8075, "learning_rate": 1.1790113192549003e-06, "epoch": 4.415237497535915, "total_flos": 4213150063778918400, "step": 1097500 }, { "loss": 2.8025, "learning_rate": 1.1782001938589697e-06, "epoch": 4.41563979708011, "total_flos": 4213535293489827840, "step": 1097600 }, { "loss": 2.795, "learning_rate": 1.1773890684630391e-06, "epoch": 4.416042096624304, "total_flos": 4213917363011604480, "step": 1097700 }, { "loss": 2.7775, "learning_rate": 1.1765779430671085e-06, "epoch": 4.416444396168499, "total_flos": 4214302598033756160, "step": 1097800 }, { "loss": 2.7825, "learning_rate": 1.175766817671178e-06, "epoch": 4.416846695712694, "total_flos": 4214701573239582720, "step": 1097900 }, { "loss": 2.7375, "learning_rate": 1.1749556922752474e-06, "epoch": 4.417248995256888, "total_flos": 4215091853941862400, "step": 1098000 }, { "loss": 2.8325, "learning_rate": 1.1741445668793168e-06, "epoch": 4.417651294801083, "total_flos": 4215486803226071040, "step": 1098100 }, { "loss": 2.765, "learning_rate": 1.1733334414833862e-06, "epoch": 4.4180535943452774, "total_flos": 4215859716166225920, "step": 1098200 }, { "loss": 2.7875, "learning_rate": 1.1725223160874556e-06, "epoch": 4.4184558938894725, "total_flos": 4216225527975505920, "step": 1098300 }, { "loss": 2.7975, "learning_rate": 1.171711190691525e-06, "epoch": 4.418858193433667, "total_flos": 4216621390793379840, "step": 1098400 }, { "loss": 2.7875, "learning_rate": 1.1709000652955944e-06, "epoch": 4.419260492977862, "total_flos": 4217019760517591040, "step": 1098500 }, { "loss": 2.7425, "learning_rate": 1.1700889398996638e-06, "epoch": 4.419662792522056, "total_flos": 4217413960916643840, "step": 1098600 }, { "loss": 2.75, "learning_rate": 1.1692778145037332e-06, "epoch": 4.420065092066251, "total_flos": 4217792811825623040, "step": 1098700 }, { "loss": 2.745, "learning_rate": 1.1684666891078026e-06, "epoch": 4.420467391610446, "total_flos": 4218175200021934080, "step": 1098800 }, { "loss": 2.82, "learning_rate": 1.167655563711872e-06, "epoch": 4.42086969115464, "total_flos": 4218554852928491520, "step": 1098900 }, { "loss": 2.78, "learning_rate": 1.1668444383159414e-06, "epoch": 4.421271990698835, "total_flos": 4218948219462512640, "step": 1099000 }, { "loss": 2.775, "learning_rate": 1.1660333129200108e-06, "epoch": 4.421674290243029, "total_flos": 4219346015572561920, "step": 1099100 }, { "loss": 2.7275, "learning_rate": 1.1652221875240802e-06, "epoch": 4.422076589787224, "total_flos": 4219730289259868160, "step": 1099200 }, { "loss": 2.8175, "learning_rate": 1.1644110621281499e-06, "epoch": 4.422478889331418, "total_flos": 4220110717607792640, "step": 1099300 }, { "loss": 2.7925, "learning_rate": 1.1635999367322193e-06, "epoch": 4.422881188875613, "total_flos": 4220494460170874880, "step": 1099400 }, { "loss": 2.825, "learning_rate": 1.1627888113362887e-06, "epoch": 4.423283488419807, "total_flos": 4220885627850608640, "step": 1099500 }, { "loss": 2.785, "learning_rate": 1.161977685940358e-06, "epoch": 4.423685787964002, "total_flos": 4221259279053434880, "step": 1099600 }, { "loss": 2.765, "learning_rate": 1.1611665605444275e-06, "epoch": 4.4240880875081965, "total_flos": 4221672573368340480, "step": 1099700 }, { "loss": 2.745, "learning_rate": 1.160355435148497e-06, "epoch": 4.4244903870523915, "total_flos": 4222049374137815040, "step": 1099800 }, { "loss": 2.7875, "learning_rate": 1.1595443097525663e-06, "epoch": 4.424892686596586, "total_flos": 4222444360600719360, "step": 1099900 }, { "loss": 2.77, "learning_rate": 1.1587331843566357e-06, "epoch": 4.425294986140781, "total_flos": 4222826961246720000, "step": 1100000 }, { "loss": 2.745, "learning_rate": 1.1579220589607051e-06, "epoch": 4.425697285684976, "total_flos": 4223206502617190400, "step": 1100100 }, { "loss": 2.795, "learning_rate": 1.1571109335647745e-06, "epoch": 4.42609958522917, "total_flos": 4223590627589713920, "step": 1100200 }, { "loss": 2.76, "learning_rate": 1.156299808168844e-06, "epoch": 4.426501884773365, "total_flos": 4223963492728688640, "step": 1100300 }, { "loss": 2.7375, "learning_rate": 1.1554886827729133e-06, "epoch": 4.426904184317559, "total_flos": 4224340888357294080, "step": 1100400 }, { "loss": 2.8025, "learning_rate": 1.1546775573769828e-06, "epoch": 4.427306483861754, "total_flos": 4224730319260815360, "step": 1100500 }, { "loss": 2.8, "learning_rate": 1.1538664319810522e-06, "epoch": 4.427708783405948, "total_flos": 4225114231783649280, "step": 1100600 }, { "loss": 2.815, "learning_rate": 1.1530553065851216e-06, "epoch": 4.428111082950143, "total_flos": 4225483835819888640, "step": 1100700 }, { "loss": 2.78, "learning_rate": 1.152244181189191e-06, "epoch": 4.428513382494337, "total_flos": 4225856286681968640, "step": 1100800 }, { "loss": 2.7925, "learning_rate": 1.1514330557932604e-06, "epoch": 4.428915682038532, "total_flos": 4226233087451443200, "step": 1100900 }, { "loss": 2.77, "learning_rate": 1.1506219303973298e-06, "epoch": 4.429317981582727, "total_flos": 4226614710828871680, "step": 1101000 }, { "loss": 2.7425, "learning_rate": 1.1498108050013992e-06, "epoch": 4.429720281126921, "total_flos": 4226985042505297920, "step": 1101100 }, { "loss": 2.77, "learning_rate": 1.1489996796054686e-06, "epoch": 4.430122580671116, "total_flos": 4227360998787256320, "step": 1101200 }, { "loss": 2.76, "learning_rate": 1.148188554209538e-06, "epoch": 4.4305248802153105, "total_flos": 4227745373388165120, "step": 1101300 }, { "loss": 2.7925, "learning_rate": 1.1473774288136074e-06, "epoch": 4.430927179759506, "total_flos": 4228127820008140800, "step": 1101400 }, { "loss": 2.81, "learning_rate": 1.1465663034176768e-06, "epoch": 4.4313294793037, "total_flos": 4228508869771407360, "step": 1101500 }, { "loss": 2.855, "learning_rate": 1.1457551780217464e-06, "epoch": 4.431731778847895, "total_flos": 4228887646322995200, "step": 1101600 }, { "loss": 2.7475, "learning_rate": 1.1449440526258159e-06, "epoch": 4.432134078392089, "total_flos": 4229259034936627200, "step": 1101700 }, { "loss": 2.7675, "learning_rate": 1.1441329272298853e-06, "epoch": 4.432536377936284, "total_flos": 4229646904334929920, "step": 1101800 }, { "loss": 2.82, "learning_rate": 1.1433218018339547e-06, "epoch": 4.432938677480479, "total_flos": 4230017963651543040, "step": 1101900 }, { "loss": 2.76, "learning_rate": 1.142510676438024e-06, "epoch": 4.433340977024673, "total_flos": 4230404653954068480, "step": 1102000 }, { "loss": 2.7775, "learning_rate": 1.1416995510420935e-06, "epoch": 4.433743276568868, "total_flos": 4230796506784051200, "step": 1102100 }, { "loss": 2.7325, "learning_rate": 1.1408884256461629e-06, "epoch": 4.434145576113062, "total_flos": 4231185597768069120, "step": 1102200 }, { "loss": 2.805, "learning_rate": 1.1400773002502323e-06, "epoch": 4.434547875657257, "total_flos": 4231561580606238720, "step": 1102300 }, { "loss": 2.8225, "learning_rate": 1.1392661748543017e-06, "epoch": 4.434950175201451, "total_flos": 4231944855780003840, "step": 1102400 }, { "loss": 2.81, "learning_rate": 1.1384550494583711e-06, "epoch": 4.435352474745646, "total_flos": 4232332353391349760, "step": 1102500 }, { "loss": 2.755, "learning_rate": 1.1376439240624405e-06, "epoch": 4.43575477428984, "total_flos": 4232728152474316800, "step": 1102600 }, { "loss": 2.795, "learning_rate": 1.13683279866651e-06, "epoch": 4.4361570738340355, "total_flos": 4233126564688465920, "step": 1102700 }, { "loss": 2.775, "learning_rate": 1.1360216732705793e-06, "epoch": 4.43655937337823, "total_flos": 4233527297915473920, "step": 1102800 }, { "loss": 2.8275, "learning_rate": 1.1352105478746487e-06, "epoch": 4.436961672922425, "total_flos": 4233918338125393920, "step": 1102900 }, { "loss": 2.8525, "learning_rate": 1.1343994224787182e-06, "epoch": 4.437363972466619, "total_flos": 4234300253621145600, "step": 1103000 }, { "loss": 2.75, "learning_rate": 1.1335882970827876e-06, "epoch": 4.437766272010814, "total_flos": 4234689615478517760, "step": 1103100 }, { "loss": 2.76, "learning_rate": 1.132777171686857e-06, "epoch": 4.438168571555009, "total_flos": 4235078265629429760, "step": 1103200 }, { "loss": 2.8275, "learning_rate": 1.1319660462909264e-06, "epoch": 4.438570871099203, "total_flos": 4235470511491338240, "step": 1103300 }, { "loss": 2.8175, "learning_rate": 1.1311549208949958e-06, "epoch": 4.438973170643398, "total_flos": 4235846451839569920, "step": 1103400 }, { "loss": 2.7825, "learning_rate": 1.1303437954990652e-06, "epoch": 4.439375470187592, "total_flos": 4236211514763694080, "step": 1103500 }, { "loss": 2.805, "learning_rate": 1.1295326701031346e-06, "epoch": 4.439777769731787, "total_flos": 4236609974779023360, "step": 1103600 }, { "loss": 2.8175, "learning_rate": 1.128721544707204e-06, "epoch": 4.440180069275981, "total_flos": 4236998083183226880, "step": 1103700 }, { "loss": 2.83, "learning_rate": 1.1279104193112734e-06, "epoch": 4.440582368820176, "total_flos": 4237401493276323840, "step": 1103800 }, { "loss": 2.825, "learning_rate": 1.1270992939153428e-06, "epoch": 4.44098466836437, "total_flos": 4237814644187688960, "step": 1103900 }, { "loss": 2.8475, "learning_rate": 1.1262881685194124e-06, "epoch": 4.441386967908565, "total_flos": 4238203251848663040, "step": 1104000 }, { "loss": 2.745, "learning_rate": 1.1254770431234819e-06, "epoch": 4.44178926745276, "total_flos": 4238581067065405440, "step": 1104100 }, { "loss": 2.82, "learning_rate": 1.1246659177275513e-06, "epoch": 4.4421915669969545, "total_flos": 4238961580393205760, "step": 1104200 }, { "loss": 2.7525, "learning_rate": 1.1238547923316207e-06, "epoch": 4.4425938665411495, "total_flos": 4239352848986542080, "step": 1104300 }, { "loss": 2.7375, "learning_rate": 1.12304366693569e-06, "epoch": 4.442996166085344, "total_flos": 4239743650190561280, "step": 1104400 }, { "loss": 2.8275, "learning_rate": 1.1222325415397595e-06, "epoch": 4.443398465629539, "total_flos": 4240135375550730240, "step": 1104500 }, { "loss": 2.85, "learning_rate": 1.1214214161438289e-06, "epoch": 4.443800765173733, "total_flos": 4240534796900904960, "step": 1104600 }, { "loss": 2.735, "learning_rate": 1.1206102907478983e-06, "epoch": 4.444203064717928, "total_flos": 4240930659718778880, "step": 1104700 }, { "loss": 2.81, "learning_rate": 1.1197991653519677e-06, "epoch": 4.444605364262122, "total_flos": 4241312468989685760, "step": 1104800 }, { "loss": 2.7825, "learning_rate": 1.1189880399560371e-06, "epoch": 4.445007663806317, "total_flos": 4241692063472578560, "step": 1104900 }, { "loss": 2.7875, "learning_rate": 1.1181769145601065e-06, "epoch": 4.445409963350512, "total_flos": 4242071312724725760, "step": 1105000 }, { "loss": 2.815, "learning_rate": 1.117365789164176e-06, "epoch": 4.445812262894706, "total_flos": 4242448825200660480, "step": 1105100 }, { "loss": 2.8, "learning_rate": 1.1165546637682453e-06, "epoch": 4.446214562438901, "total_flos": 4242831064682188800, "step": 1105200 }, { "loss": 2.795, "learning_rate": 1.1157435383723147e-06, "epoch": 4.446616861983095, "total_flos": 4243214355789680640, "step": 1105300 }, { "loss": 2.7975, "learning_rate": 1.1149324129763841e-06, "epoch": 4.44701916152729, "total_flos": 4243589483517849600, "step": 1105400 }, { "loss": 2.795, "learning_rate": 1.1141212875804536e-06, "epoch": 4.447421461071484, "total_flos": 4243969332940369920, "step": 1105500 }, { "loss": 2.75, "learning_rate": 1.113310162184523e-06, "epoch": 4.447823760615679, "total_flos": 4244344620005806080, "step": 1105600 }, { "loss": 2.76, "learning_rate": 1.1124990367885924e-06, "epoch": 4.4482260601598735, "total_flos": 4244727608372490240, "step": 1105700 }, { "loss": 2.765, "learning_rate": 1.1116879113926618e-06, "epoch": 4.448628359704069, "total_flos": 4245116577197936640, "step": 1105800 }, { "loss": 2.815, "learning_rate": 1.1108767859967312e-06, "epoch": 4.449030659248263, "total_flos": 4245500447230832640, "step": 1105900 }, { "loss": 2.8425, "learning_rate": 1.1100656606008006e-06, "epoch": 4.449432958792458, "total_flos": 4245885538849443840, "step": 1106000 }, { "loss": 2.7975, "learning_rate": 1.10925453520487e-06, "epoch": 4.449835258336652, "total_flos": 4246264862458982400, "step": 1106100 }, { "loss": 2.765, "learning_rate": 1.1084434098089394e-06, "epoch": 4.450237557880847, "total_flos": 4246642555517153280, "step": 1106200 }, { "loss": 2.7175, "learning_rate": 1.1076322844130088e-06, "epoch": 4.450639857425042, "total_flos": 4247015888045445120, "step": 1106300 }, { "loss": 2.79, "learning_rate": 1.1068211590170784e-06, "epoch": 4.451042156969236, "total_flos": 4247398446201507840, "step": 1106400 }, { "loss": 2.84, "learning_rate": 1.1060100336211478e-06, "epoch": 4.451444456513431, "total_flos": 4247795658074910720, "step": 1106500 }, { "loss": 2.81, "learning_rate": 1.1051989082252173e-06, "epoch": 4.451846756057625, "total_flos": 4248185705082531840, "step": 1106600 }, { "loss": 2.81, "learning_rate": 1.1043877828292867e-06, "epoch": 4.45224905560182, "total_flos": 4248579539005870080, "step": 1106700 }, { "loss": 2.8, "learning_rate": 1.103576657433356e-06, "epoch": 4.452651355146014, "total_flos": 4248976442827223040, "step": 1106800 }, { "loss": 2.8325, "learning_rate": 1.1027655320374255e-06, "epoch": 4.453053654690209, "total_flos": 4249352638115082240, "step": 1106900 }, { "loss": 2.78, "learning_rate": 1.1019544066414949e-06, "epoch": 4.453455954234403, "total_flos": 4249752888019046400, "step": 1107000 }, { "loss": 2.7675, "learning_rate": 1.1011432812455643e-06, "epoch": 4.4538582537785985, "total_flos": 4250132078847528960, "step": 1107100 }, { "loss": 2.7475, "learning_rate": 1.1003321558496337e-06, "epoch": 4.4542605533227935, "total_flos": 4250516225065021440, "step": 1107200 }, { "loss": 2.7125, "learning_rate": 1.099521030453703e-06, "epoch": 4.454662852866988, "total_flos": 4250897593502822400, "step": 1107300 }, { "loss": 2.7675, "learning_rate": 1.0987099050577725e-06, "epoch": 4.455065152411183, "total_flos": 4251298597603184640, "step": 1107400 }, { "loss": 2.7725, "learning_rate": 1.097898779661842e-06, "epoch": 4.455467451955377, "total_flos": 4251678462959431680, "step": 1107500 }, { "loss": 2.745, "learning_rate": 1.0970876542659113e-06, "epoch": 4.455869751499572, "total_flos": 4252068393119723520, "step": 1107600 }, { "loss": 2.82, "learning_rate": 1.0962765288699807e-06, "epoch": 4.456272051043766, "total_flos": 4252443977614725120, "step": 1107700 }, { "loss": 2.7625, "learning_rate": 1.0954654034740501e-06, "epoch": 4.456674350587961, "total_flos": 4252831825768058880, "step": 1107800 }, { "loss": 2.76, "learning_rate": 1.0946542780781196e-06, "epoch": 4.457076650132155, "total_flos": 4253218616984186880, "step": 1107900 }, { "loss": 2.81, "learning_rate": 1.093843152682189e-06, "epoch": 4.45747894967635, "total_flos": 4253598423916769280, "step": 1108000 }, { "loss": 2.7925, "learning_rate": 1.0930320272862584e-06, "epoch": 4.457881249220545, "total_flos": 4253996459032719360, "step": 1108100 }, { "loss": 2.785, "learning_rate": 1.0922209018903278e-06, "epoch": 4.458283548764739, "total_flos": 4254385300388352000, "step": 1108200 }, { "loss": 2.795, "learning_rate": 1.0914097764943972e-06, "epoch": 4.458685848308934, "total_flos": 4254770476986839040, "step": 1108300 }, { "loss": 2.7975, "learning_rate": 1.0905986510984666e-06, "epoch": 4.459088147853128, "total_flos": 4255150698196316160, "step": 1108400 }, { "loss": 2.8125, "learning_rate": 1.089787525702536e-06, "epoch": 4.459490447397323, "total_flos": 4255536389985300480, "step": 1108500 }, { "loss": 2.7325, "learning_rate": 1.0889764003066054e-06, "epoch": 4.4598927469415175, "total_flos": 4255911783275581440, "step": 1108600 }, { "loss": 2.775, "learning_rate": 1.088165274910675e-06, "epoch": 4.4602950464857125, "total_flos": 4256288817739714560, "step": 1108700 }, { "loss": 2.7775, "learning_rate": 1.0873541495147444e-06, "epoch": 4.460697346029907, "total_flos": 4256678429225472000, "step": 1108800 }, { "loss": 2.7325, "learning_rate": 1.0865430241188138e-06, "epoch": 4.461099645574102, "total_flos": 4257065204507873280, "step": 1108900 }, { "loss": 2.795, "learning_rate": 1.0857318987228832e-06, "epoch": 4.461501945118296, "total_flos": 4257436885239828480, "step": 1109000 }, { "loss": 2.8325, "learning_rate": 1.0849207733269527e-06, "epoch": 4.461904244662491, "total_flos": 4257813978127626240, "step": 1109100 }, { "loss": 2.8125, "learning_rate": 1.084109647931022e-06, "epoch": 4.462306544206686, "total_flos": 4258192929950208000, "step": 1109200 }, { "loss": 2.78, "learning_rate": 1.0832985225350915e-06, "epoch": 4.46270884375088, "total_flos": 4258575424371363840, "step": 1109300 }, { "loss": 2.845, "learning_rate": 1.0824873971391609e-06, "epoch": 4.463111143295075, "total_flos": 4258972726535884800, "step": 1109400 }, { "loss": 2.7625, "learning_rate": 1.0816762717432303e-06, "epoch": 4.463513442839269, "total_flos": 4259351120678031360, "step": 1109500 }, { "loss": 2.805, "learning_rate": 1.0808651463472997e-06, "epoch": 4.463915742383464, "total_flos": 4259739802696396800, "step": 1109600 }, { "loss": 2.7875, "learning_rate": 1.080054020951369e-06, "epoch": 4.464318041927658, "total_flos": 4260124469415628800, "step": 1109700 }, { "loss": 2.7375, "learning_rate": 1.0792428955554385e-06, "epoch": 4.464720341471853, "total_flos": 4260496766251683840, "step": 1109800 }, { "loss": 2.75, "learning_rate": 1.078431770159508e-06, "epoch": 4.465122641016047, "total_flos": 4260874937321656320, "step": 1109900 }, { "loss": 2.8125, "learning_rate": 1.0776206447635773e-06, "epoch": 4.465524940560242, "total_flos": 4261262604892753920, "step": 1110000 }, { "loss": 2.815, "learning_rate": 1.0768095193676467e-06, "epoch": 4.4659272401044365, "total_flos": 4261648785316024320, "step": 1110100 }, { "loss": 2.7825, "learning_rate": 1.0759983939717161e-06, "epoch": 4.4663295396486316, "total_flos": 4262013635790458880, "step": 1110200 }, { "loss": 2.79, "learning_rate": 1.0751872685757855e-06, "epoch": 4.466731839192827, "total_flos": 4262416775010201600, "step": 1110300 }, { "loss": 2.7425, "learning_rate": 1.074376143179855e-06, "epoch": 4.467134138737021, "total_flos": 4262794430889676800, "step": 1110400 }, { "loss": 2.78, "learning_rate": 1.0735650177839244e-06, "epoch": 4.467536438281216, "total_flos": 4263174901727539200, "step": 1110500 }, { "loss": 2.8225, "learning_rate": 1.0727538923879938e-06, "epoch": 4.46793873782541, "total_flos": 4263561491116462080, "step": 1110600 }, { "loss": 2.7675, "learning_rate": 1.0719427669920632e-06, "epoch": 4.468341037369605, "total_flos": 4263970605483724800, "step": 1110700 }, { "loss": 2.74, "learning_rate": 1.0711316415961326e-06, "epoch": 4.468743336913799, "total_flos": 4264359197210972160, "step": 1110800 }, { "loss": 2.74, "learning_rate": 1.070320516200202e-06, "epoch": 4.469145636457994, "total_flos": 4264731016035225600, "step": 1110900 }, { "loss": 2.8375, "learning_rate": 1.0695093908042714e-06, "epoch": 4.469547936002188, "total_flos": 4265111826792591360, "step": 1111000 }, { "loss": 2.7625, "learning_rate": 1.068698265408341e-06, "epoch": 4.469950235546383, "total_flos": 4265481887595663360, "step": 1111100 }, { "loss": 2.8025, "learning_rate": 1.0678871400124104e-06, "epoch": 4.470352535090578, "total_flos": 4265860770372096000, "step": 1111200 }, { "loss": 2.8525, "learning_rate": 1.0670760146164798e-06, "epoch": 4.470754834634772, "total_flos": 4266248554790522880, "step": 1111300 }, { "loss": 2.7775, "learning_rate": 1.0662648892205492e-06, "epoch": 4.471157134178967, "total_flos": 4266636344520192000, "step": 1111400 }, { "loss": 2.795, "learning_rate": 1.0654537638246186e-06, "epoch": 4.4715594337231614, "total_flos": 4267027028876881920, "step": 1111500 }, { "loss": 2.8, "learning_rate": 1.064642638428688e-06, "epoch": 4.4719617332673565, "total_flos": 4267404690067599360, "step": 1111600 }, { "loss": 2.79, "learning_rate": 1.0638315130327575e-06, "epoch": 4.472364032811551, "total_flos": 4267802279039201280, "step": 1111700 }, { "loss": 2.825, "learning_rate": 1.0630203876368269e-06, "epoch": 4.472766332355746, "total_flos": 4268192602231418880, "step": 1111800 }, { "loss": 2.7875, "learning_rate": 1.0622092622408963e-06, "epoch": 4.47316863189994, "total_flos": 4268562588677099520, "step": 1111900 }, { "loss": 2.8175, "learning_rate": 1.0613981368449657e-06, "epoch": 4.473570931444135, "total_flos": 4268943441924403200, "step": 1112000 }, { "loss": 2.7875, "learning_rate": 1.060587011449035e-06, "epoch": 4.473973230988329, "total_flos": 4269325086546800640, "step": 1112100 }, { "loss": 2.77, "learning_rate": 1.0597758860531045e-06, "epoch": 4.474375530532524, "total_flos": 4269705302445035520, "step": 1112200 }, { "loss": 2.82, "learning_rate": 1.058964760657174e-06, "epoch": 4.474777830076719, "total_flos": 4270090500288491520, "step": 1112300 }, { "loss": 2.815, "learning_rate": 1.0581536352612433e-06, "epoch": 4.475180129620913, "total_flos": 4270476643533066240, "step": 1112400 }, { "loss": 2.7725, "learning_rate": 1.0573425098653127e-06, "epoch": 4.475582429165108, "total_flos": 4270867099506339840, "step": 1112500 }, { "loss": 2.7725, "learning_rate": 1.0565313844693821e-06, "epoch": 4.475984728709302, "total_flos": 4271244303930224640, "step": 1112600 }, { "loss": 2.795, "learning_rate": 1.0557202590734515e-06, "epoch": 4.476387028253497, "total_flos": 4271633825124864000, "step": 1112700 }, { "loss": 2.7925, "learning_rate": 1.054909133677521e-06, "epoch": 4.476789327797691, "total_flos": 4272026060364288000, "step": 1112800 }, { "loss": 2.7775, "learning_rate": 1.0540980082815904e-06, "epoch": 4.477191627341886, "total_flos": 4272442440510935040, "step": 1112900 }, { "loss": 2.7725, "learning_rate": 1.0532868828856598e-06, "epoch": 4.4775939268860805, "total_flos": 4272843269340303360, "step": 1113000 }, { "loss": 2.78, "learning_rate": 1.0524757574897292e-06, "epoch": 4.4779962264302755, "total_flos": 4273222816022016000, "step": 1113100 }, { "loss": 2.745, "learning_rate": 1.0516646320937986e-06, "epoch": 4.47839852597447, "total_flos": 4273582796087316480, "step": 1113200 }, { "loss": 2.84, "learning_rate": 1.050853506697868e-06, "epoch": 4.478800825518665, "total_flos": 4273958157510144000, "step": 1113300 }, { "loss": 2.765, "learning_rate": 1.0500423813019374e-06, "epoch": 4.47920312506286, "total_flos": 4274336158620364800, "step": 1113400 }, { "loss": 2.825, "learning_rate": 1.049231255906007e-06, "epoch": 4.479605424607054, "total_flos": 4274719837448540160, "step": 1113500 }, { "loss": 2.7975, "learning_rate": 1.0484201305100764e-06, "epoch": 4.480007724151249, "total_flos": 4275118786098155520, "step": 1113600 }, { "loss": 2.7825, "learning_rate": 1.0476090051141458e-06, "epoch": 4.480410023695443, "total_flos": 4275496622559866880, "step": 1113700 }, { "loss": 2.7675, "learning_rate": 1.0467978797182152e-06, "epoch": 4.480812323239638, "total_flos": 4275870385298780160, "step": 1113800 }, { "loss": 2.755, "learning_rate": 1.0459867543222846e-06, "epoch": 4.481214622783832, "total_flos": 4276236871635824640, "step": 1113900 }, { "loss": 2.79, "learning_rate": 1.045175628926354e-06, "epoch": 4.481616922328027, "total_flos": 4276612472064552960, "step": 1114000 }, { "loss": 2.7525, "learning_rate": 1.0443645035304235e-06, "epoch": 4.482019221872221, "total_flos": 4277006858357084160, "step": 1114100 }, { "loss": 2.7575, "learning_rate": 1.0435533781344929e-06, "epoch": 4.482421521416416, "total_flos": 4277392210226565120, "step": 1114200 }, { "loss": 2.815, "learning_rate": 1.0427422527385623e-06, "epoch": 4.482823820960611, "total_flos": 4277774014186229760, "step": 1114300 }, { "loss": 2.7875, "learning_rate": 1.0419311273426317e-06, "epoch": 4.483226120504805, "total_flos": 4278172229884416000, "step": 1114400 }, { "loss": 2.82, "learning_rate": 1.041120001946701e-06, "epoch": 4.483628420049, "total_flos": 4278561443027005440, "step": 1114500 }, { "loss": 2.8125, "learning_rate": 1.0403088765507705e-06, "epoch": 4.4840307195931945, "total_flos": 4278952111449968640, "step": 1114600 }, { "loss": 2.7675, "learning_rate": 1.03949775115484e-06, "epoch": 4.48443301913739, "total_flos": 4279335052015472640, "step": 1114700 }, { "loss": 2.745, "learning_rate": 1.0386866257589093e-06, "epoch": 4.484835318681584, "total_flos": 4279727048248995840, "step": 1114800 }, { "loss": 2.79, "learning_rate": 1.0378755003629787e-06, "epoch": 4.485237618225779, "total_flos": 4280105628284620800, "step": 1114900 }, { "loss": 2.7725, "learning_rate": 1.0370643749670481e-06, "epoch": 4.485639917769973, "total_flos": 4280483347899002880, "step": 1115000 }, { "loss": 2.7725, "learning_rate": 1.0362532495711175e-06, "epoch": 4.486042217314168, "total_flos": 4280856733539717120, "step": 1115100 }, { "loss": 2.8225, "learning_rate": 1.035442124175187e-06, "epoch": 4.486444516858362, "total_flos": 4281241363080253440, "step": 1115200 }, { "loss": 2.795, "learning_rate": 1.0346309987792563e-06, "epoch": 4.486846816402557, "total_flos": 4281624388625633280, "step": 1115300 }, { "loss": 2.715, "learning_rate": 1.0338198733833258e-06, "epoch": 4.487249115946752, "total_flos": 4281998783402373120, "step": 1115400 }, { "loss": 2.8, "learning_rate": 1.0330087479873952e-06, "epoch": 4.487651415490946, "total_flos": 4282392171181363200, "step": 1115500 }, { "loss": 2.84, "learning_rate": 1.0321976225914646e-06, "epoch": 4.488053715035141, "total_flos": 4282762364765491200, "step": 1115600 }, { "loss": 2.725, "learning_rate": 1.031386497195534e-06, "epoch": 4.488456014579335, "total_flos": 4283146048904908800, "step": 1115700 }, { "loss": 2.8, "learning_rate": 1.0305753717996034e-06, "epoch": 4.48885831412353, "total_flos": 4283517485319720960, "step": 1115800 }, { "loss": 2.77, "learning_rate": 1.029764246403673e-06, "epoch": 4.489260613667724, "total_flos": 4283906804687155200, "step": 1115900 }, { "loss": 2.7675, "learning_rate": 1.0289531210077424e-06, "epoch": 4.4896629132119195, "total_flos": 4284285081981972480, "step": 1116000 }, { "loss": 2.8025, "learning_rate": 1.0281419956118118e-06, "epoch": 4.490065212756114, "total_flos": 4284662004910018560, "step": 1116100 }, { "loss": 2.8, "learning_rate": 1.0273308702158812e-06, "epoch": 4.490467512300309, "total_flos": 4285050564769812480, "step": 1116200 }, { "loss": 2.725, "learning_rate": 1.0265197448199506e-06, "epoch": 4.490869811844503, "total_flos": 4285427408029224960, "step": 1116300 }, { "loss": 2.745, "learning_rate": 1.02570861942402e-06, "epoch": 4.491272111388698, "total_flos": 4285810125522554880, "step": 1116400 }, { "loss": 2.795, "learning_rate": 1.0248974940280895e-06, "epoch": 4.491674410932893, "total_flos": 4286202429808128000, "step": 1116500 }, { "loss": 2.8325, "learning_rate": 1.0240863686321589e-06, "epoch": 4.492076710477087, "total_flos": 4286568602781880320, "step": 1116600 }, { "loss": 2.755, "learning_rate": 1.0232752432362283e-06, "epoch": 4.492479010021282, "total_flos": 4286953768757882880, "step": 1116700 }, { "loss": 2.7625, "learning_rate": 1.0224641178402977e-06, "epoch": 4.492881309565476, "total_flos": 4287331647709532160, "step": 1116800 }, { "loss": 2.785, "learning_rate": 1.021652992444367e-06, "epoch": 4.493283609109671, "total_flos": 4287714906949570560, "step": 1116900 }, { "loss": 2.8, "learning_rate": 1.0208418670484365e-06, "epoch": 4.493685908653865, "total_flos": 4288097980296130560, "step": 1117000 }, { "loss": 2.7775, "learning_rate": 1.020030741652506e-06, "epoch": 4.49408820819806, "total_flos": 4288469512313303040, "step": 1117100 }, { "loss": 2.7925, "learning_rate": 1.0192196162565753e-06, "epoch": 4.494490507742254, "total_flos": 4288852330720235520, "step": 1117200 }, { "loss": 2.7525, "learning_rate": 1.0184084908606447e-06, "epoch": 4.494892807286449, "total_flos": 4289249234541588480, "step": 1117300 }, { "loss": 2.79, "learning_rate": 1.0175973654647141e-06, "epoch": 4.495295106830644, "total_flos": 4289640327863930880, "step": 1117400 }, { "loss": 2.7725, "learning_rate": 1.0167862400687835e-06, "epoch": 4.4956974063748385, "total_flos": 4290002113751592960, "step": 1117500 }, { "loss": 2.7325, "learning_rate": 1.015975114672853e-06, "epoch": 4.4960997059190335, "total_flos": 4290387816163061760, "step": 1117600 }, { "loss": 2.8275, "learning_rate": 1.0151639892769223e-06, "epoch": 4.496502005463228, "total_flos": 4290781793489940480, "step": 1117700 }, { "loss": 2.8575, "learning_rate": 1.0143528638809918e-06, "epoch": 4.496904305007423, "total_flos": 4291147881483816960, "step": 1117800 }, { "loss": 2.71, "learning_rate": 1.0135417384850612e-06, "epoch": 4.497306604551617, "total_flos": 4291531294749880320, "step": 1117900 }, { "loss": 2.74, "learning_rate": 1.0127306130891306e-06, "epoch": 4.497708904095812, "total_flos": 4291895614100090880, "step": 1118000 }, { "loss": 2.815, "learning_rate": 1.0119194876932e-06, "epoch": 4.498111203640006, "total_flos": 4292290276577218560, "step": 1118100 }, { "loss": 2.72, "learning_rate": 1.0111083622972696e-06, "epoch": 4.498513503184201, "total_flos": 4292678900171919360, "step": 1118200 }, { "loss": 2.7725, "learning_rate": 1.010297236901339e-06, "epoch": 4.498915802728396, "total_flos": 4293035937809018880, "step": 1118300 }, { "loss": 2.8575, "learning_rate": 1.0094861115054084e-06, "epoch": 4.49931810227259, "total_flos": 4293420923202785280, "step": 1118400 }, { "loss": 2.7225, "learning_rate": 1.0086749861094778e-06, "epoch": 4.499720401816785, "total_flos": 4293794978060021760, "step": 1118500 }, { "loss": 2.8025, "learning_rate": 1.0078638607135472e-06, "epoch": 4.500122701360979, "total_flos": 4294167407677132800, "step": 1118600 }, { "loss": 2.835, "learning_rate": 1.0070527353176166e-06, "epoch": 4.500525000905174, "total_flos": 4294551474225991680, "step": 1118700 }, { "loss": 2.76, "learning_rate": 1.006241609921686e-06, "epoch": 4.500927300449368, "total_flos": 4294956987571015680, "step": 1118800 }, { "loss": 2.8525, "learning_rate": 1.0054304845257554e-06, "epoch": 4.501329599993563, "total_flos": 4295327175843901440, "step": 1118900 }, { "loss": 2.785, "learning_rate": 1.0046193591298249e-06, "epoch": 4.5017318995377575, "total_flos": 4295714737190154240, "step": 1119000 }, { "loss": 2.815, "learning_rate": 1.0038082337338943e-06, "epoch": 4.502134199081953, "total_flos": 4296091930991554560, "step": 1119100 }, { "loss": 2.7925, "learning_rate": 1.0029971083379637e-06, "epoch": 4.502536498626147, "total_flos": 4296483135849984000, "step": 1119200 }, { "loss": 2.81, "learning_rate": 1.002185982942033e-06, "epoch": 4.502938798170342, "total_flos": 4296852676151316480, "step": 1119300 }, { "loss": 2.76, "learning_rate": 1.0013748575461025e-06, "epoch": 4.503341097714536, "total_flos": 4297236551495454720, "step": 1119400 }, { "loss": 2.8, "learning_rate": 1.0005637321501719e-06, "epoch": 4.503743397258731, "total_flos": 4297620458707046400, "step": 1119500 }, { "loss": 2.77, "learning_rate": 9.997526067542413e-07, "epoch": 4.504145696802926, "total_flos": 4298008264370442240, "step": 1119600 }, { "loss": 2.7925, "learning_rate": 9.989414813583107e-07, "epoch": 4.50454799634712, "total_flos": 4298400871396823040, "step": 1119700 }, { "loss": 2.8125, "learning_rate": 9.981303559623801e-07, "epoch": 4.504950295891315, "total_flos": 4298794954948546560, "step": 1119800 }, { "loss": 2.785, "learning_rate": 9.973192305664495e-07, "epoch": 4.505352595435509, "total_flos": 4299174167021998080, "step": 1119900 }, { "loss": 2.7675, "learning_rate": 9.96508105170519e-07, "epoch": 4.505754894979704, "total_flos": 4299555864756817920, "step": 1120000 }, { "loss": 2.7525, "learning_rate": 9.956969797745883e-07, "epoch": 4.506157194523898, "total_flos": 4299945901141954560, "step": 1120100 }, { "loss": 2.7875, "learning_rate": 9.948858543786577e-07, "epoch": 4.506559494068093, "total_flos": 4300329521546465280, "step": 1120200 }, { "loss": 2.8125, "learning_rate": 9.940747289827272e-07, "epoch": 4.506961793612287, "total_flos": 4300709928649420800, "step": 1120300 }, { "loss": 2.7875, "learning_rate": 9.932636035867966e-07, "epoch": 4.5073640931564825, "total_flos": 4301103720082821120, "step": 1120400 }, { "loss": 2.785, "learning_rate": 9.92452478190866e-07, "epoch": 4.5077663927006775, "total_flos": 4301500910711255040, "step": 1120500 }, { "loss": 2.805, "learning_rate": 9.916413527949354e-07, "epoch": 4.508168692244872, "total_flos": 4301881424039055360, "step": 1120600 }, { "loss": 2.8, "learning_rate": 9.908302273990048e-07, "epoch": 4.508570991789067, "total_flos": 4302268810114314240, "step": 1120700 }, { "loss": 2.8025, "learning_rate": 9.900191020030744e-07, "epoch": 4.508973291333261, "total_flos": 4302654278831124480, "step": 1120800 }, { "loss": 2.785, "learning_rate": 9.892079766071438e-07, "epoch": 4.509375590877456, "total_flos": 4303048659812413440, "step": 1120900 }, { "loss": 2.89, "learning_rate": 9.883968512112132e-07, "epoch": 4.50977789042165, "total_flos": 4303435971530280960, "step": 1121000 }, { "loss": 2.78, "learning_rate": 9.875857258152826e-07, "epoch": 4.510180189965845, "total_flos": 4303822714945228800, "step": 1121100 }, { "loss": 2.8075, "learning_rate": 9.86774600419352e-07, "epoch": 4.510582489510039, "total_flos": 4304221642349875200, "step": 1121200 }, { "loss": 2.7475, "learning_rate": 9.859634750234214e-07, "epoch": 4.510984789054234, "total_flos": 4304620123610173440, "step": 1121300 }, { "loss": 2.7725, "learning_rate": 9.851523496274908e-07, "epoch": 4.511387088598429, "total_flos": 4305016177632768000, "step": 1121400 }, { "loss": 2.76, "learning_rate": 9.843412242315603e-07, "epoch": 4.511789388142623, "total_flos": 4305407552450949120, "step": 1121500 }, { "loss": 2.785, "learning_rate": 9.835300988356297e-07, "epoch": 4.512191687686818, "total_flos": 4305806134624849920, "step": 1121600 }, { "loss": 2.7875, "learning_rate": 9.82718973439699e-07, "epoch": 4.512593987231012, "total_flos": 4306186701065072640, "step": 1121700 }, { "loss": 2.8525, "learning_rate": 9.819078480437685e-07, "epoch": 4.512996286775207, "total_flos": 4306562328050012160, "step": 1121800 }, { "loss": 2.7675, "learning_rate": 9.810967226478379e-07, "epoch": 4.5133985863194015, "total_flos": 4306940509742469120, "step": 1121900 }, { "loss": 2.7625, "learning_rate": 9.802855972519073e-07, "epoch": 4.5138008858635965, "total_flos": 4307325580116111360, "step": 1122000 }, { "loss": 2.7475, "learning_rate": 9.794744718559767e-07, "epoch": 4.514203185407791, "total_flos": 4307681810444390400, "step": 1122100 }, { "loss": 2.755, "learning_rate": 9.786633464600461e-07, "epoch": 4.514605484951986, "total_flos": 4308068872533872640, "step": 1122200 }, { "loss": 2.845, "learning_rate": 9.778522210641155e-07, "epoch": 4.51500778449618, "total_flos": 4308445386496266240, "step": 1122300 }, { "loss": 2.805, "learning_rate": 9.77041095668185e-07, "epoch": 4.515410084040375, "total_flos": 4308832268003512320, "step": 1122400 }, { "loss": 2.7925, "learning_rate": 9.762299702722543e-07, "epoch": 4.515812383584569, "total_flos": 4309224296104488960, "step": 1122500 }, { "loss": 2.8225, "learning_rate": 9.754188448763237e-07, "epoch": 4.516214683128764, "total_flos": 4309630606135848960, "step": 1122600 }, { "loss": 2.7675, "learning_rate": 9.746077194803931e-07, "epoch": 4.516616982672959, "total_flos": 4310028779344097280, "step": 1122700 }, { "loss": 2.7425, "learning_rate": 9.737965940844626e-07, "epoch": 4.517019282217153, "total_flos": 4310401432033382400, "step": 1122800 }, { "loss": 2.7725, "learning_rate": 9.72985468688532e-07, "epoch": 4.517421581761348, "total_flos": 4310780102360125440, "step": 1122900 }, { "loss": 2.8075, "learning_rate": 9.721743432926014e-07, "epoch": 4.517823881305542, "total_flos": 4311171663071784960, "step": 1123000 }, { "loss": 2.8075, "learning_rate": 9.713632178966708e-07, "epoch": 4.518226180849737, "total_flos": 4311540077389762560, "step": 1123100 }, { "loss": 2.7825, "learning_rate": 9.705520925007402e-07, "epoch": 4.518628480393931, "total_flos": 4311926507441418240, "step": 1123200 }, { "loss": 2.815, "learning_rate": 9.697409671048096e-07, "epoch": 4.519030779938126, "total_flos": 4312310929843507200, "step": 1123300 }, { "loss": 2.8025, "learning_rate": 9.68929841708879e-07, "epoch": 4.5194330794823205, "total_flos": 4312690311876710400, "step": 1123400 }, { "loss": 2.785, "learning_rate": 9.681187163129484e-07, "epoch": 4.5198353790265156, "total_flos": 4313086461501665280, "step": 1123500 }, { "loss": 2.76, "learning_rate": 9.67307590917018e-07, "epoch": 4.520237678570711, "total_flos": 4313466656154931200, "step": 1123600 }, { "loss": 2.75, "learning_rate": 9.664964655210874e-07, "epoch": 4.520639978114905, "total_flos": 4313850876729815040, "step": 1123700 }, { "loss": 2.7225, "learning_rate": 9.656853401251568e-07, "epoch": 4.5210422776591, "total_flos": 4314237301470228480, "step": 1123800 }, { "loss": 2.765, "learning_rate": 9.648742147292263e-07, "epoch": 4.521444577203294, "total_flos": 4314627815867166720, "step": 1123900 }, { "loss": 2.7625, "learning_rate": 9.640630893332957e-07, "epoch": 4.521846876747489, "total_flos": 4315008042387886080, "step": 1124000 }, { "loss": 2.83, "learning_rate": 9.63251963937365e-07, "epoch": 4.522249176291683, "total_flos": 4315385873538355200, "step": 1124100 }, { "loss": 2.8075, "learning_rate": 9.624408385414345e-07, "epoch": 4.522651475835878, "total_flos": 4315760039931678720, "step": 1124200 }, { "loss": 2.8325, "learning_rate": 9.616297131455039e-07, "epoch": 4.523053775380072, "total_flos": 4316157836041728000, "step": 1124300 }, { "loss": 2.79, "learning_rate": 9.608185877495733e-07, "epoch": 4.523456074924267, "total_flos": 4316541700763381760, "step": 1124400 }, { "loss": 2.775, "learning_rate": 9.600074623536427e-07, "epoch": 4.523858374468462, "total_flos": 4316923754351431680, "step": 1124500 }, { "loss": 2.765, "learning_rate": 9.59196336957712e-07, "epoch": 4.524260674012656, "total_flos": 4317318788615516160, "step": 1124600 }, { "loss": 2.755, "learning_rate": 9.583852115617815e-07, "epoch": 4.524662973556851, "total_flos": 4317722443025756160, "step": 1124700 }, { "loss": 2.85, "learning_rate": 9.57574086165851e-07, "epoch": 4.5250652731010454, "total_flos": 4318111730525736960, "step": 1124800 }, { "loss": 2.75, "learning_rate": 9.567629607699203e-07, "epoch": 4.5254675726452405, "total_flos": 4318484574419742720, "step": 1124900 }, { "loss": 2.7525, "learning_rate": 9.559518353739897e-07, "epoch": 4.525869872189435, "total_flos": 4318878955401031680, "step": 1125000 }, { "loss": 2.7375, "learning_rate": 9.551407099780591e-07, "epoch": 4.52627217173363, "total_flos": 4319277861560709120, "step": 1125100 }, { "loss": 2.815, "learning_rate": 9.543295845821286e-07, "epoch": 4.526674471277824, "total_flos": 4319649574160117760, "step": 1125200 }, { "loss": 2.8, "learning_rate": 9.535184591861981e-07, "epoch": 4.527076770822019, "total_flos": 4320050079003709440, "step": 1125300 }, { "loss": 2.7975, "learning_rate": 9.527073337902675e-07, "epoch": 4.527479070366213, "total_flos": 4320440981121331200, "step": 1125400 }, { "loss": 2.795, "learning_rate": 9.518962083943369e-07, "epoch": 4.527881369910408, "total_flos": 4320837385685913600, "step": 1125500 }, { "loss": 2.7875, "learning_rate": 9.510850829984063e-07, "epoch": 4.528283669454602, "total_flos": 4321222243609866240, "step": 1125600 }, { "loss": 2.815, "learning_rate": 9.502739576024757e-07, "epoch": 4.528685968998797, "total_flos": 4321617771819479040, "step": 1125700 }, { "loss": 2.8025, "learning_rate": 9.494628322065451e-07, "epoch": 4.529088268542992, "total_flos": 4322000430889144320, "step": 1125800 }, { "loss": 2.8, "learning_rate": 9.486517068106145e-07, "epoch": 4.529490568087186, "total_flos": 4322385458772848640, "step": 1125900 }, { "loss": 2.7325, "learning_rate": 9.478405814146839e-07, "epoch": 4.529892867631381, "total_flos": 4322800851028439040, "step": 1126000 }, { "loss": 2.7725, "learning_rate": 9.470294560187533e-07, "epoch": 4.530295167175575, "total_flos": 4323190005747363840, "step": 1126100 }, { "loss": 2.81, "learning_rate": 9.462183306228227e-07, "epoch": 4.53069746671977, "total_flos": 4323575548821565440, "step": 1126200 }, { "loss": 2.7975, "learning_rate": 9.454072052268921e-07, "epoch": 4.5310997662639645, "total_flos": 4323950703105945600, "step": 1126300 }, { "loss": 2.7925, "learning_rate": 9.445960798309615e-07, "epoch": 4.5315020658081595, "total_flos": 4324328746706104320, "step": 1126400 }, { "loss": 2.79, "learning_rate": 9.43784954435031e-07, "epoch": 4.531904365352354, "total_flos": 4324697272560168960, "step": 1126500 }, { "loss": 2.7825, "learning_rate": 9.429738290391004e-07, "epoch": 4.532306664896549, "total_flos": 4325091361423134720, "step": 1126600 }, { "loss": 2.795, "learning_rate": 9.421627036431698e-07, "epoch": 4.532708964440744, "total_flos": 4325467482353602560, "step": 1126700 }, { "loss": 2.7725, "learning_rate": 9.413515782472392e-07, "epoch": 4.533111263984938, "total_flos": 4325858049862963200, "step": 1126800 }, { "loss": 2.725, "learning_rate": 9.405404528513086e-07, "epoch": 4.533513563529133, "total_flos": 4326260689825935360, "step": 1126900 }, { "loss": 2.8, "learning_rate": 9.39729327455378e-07, "epoch": 4.533915863073327, "total_flos": 4326638505042677760, "step": 1127000 }, { "loss": 2.7225, "learning_rate": 9.389182020594474e-07, "epoch": 4.534318162617522, "total_flos": 4327013383142461440, "step": 1127100 }, { "loss": 2.8325, "learning_rate": 9.381070766635168e-07, "epoch": 4.534720462161716, "total_flos": 4327390831883489280, "step": 1127200 }, { "loss": 2.7975, "learning_rate": 9.372959512675862e-07, "epoch": 4.535122761705911, "total_flos": 4327776013793218560, "step": 1127300 }, { "loss": 2.745, "learning_rate": 9.364848258716556e-07, "epoch": 4.535525061250105, "total_flos": 4328164780791459840, "step": 1127400 }, { "loss": 2.8125, "learning_rate": 9.35673700475725e-07, "epoch": 4.5359273607943, "total_flos": 4328556022828584960, "step": 1127500 }, { "loss": 2.805, "learning_rate": 9.348625750797944e-07, "epoch": 4.536329660338495, "total_flos": 4328941810219929600, "step": 1127600 }, { "loss": 2.7125, "learning_rate": 9.340514496838641e-07, "epoch": 4.536731959882689, "total_flos": 4329315142748221440, "step": 1127700 }, { "loss": 2.8, "learning_rate": 9.332403242879335e-07, "epoch": 4.537134259426884, "total_flos": 4329692081609994240, "step": 1127800 }, { "loss": 2.8075, "learning_rate": 9.324291988920029e-07, "epoch": 4.5375365589710785, "total_flos": 4330072977347235840, "step": 1127900 }, { "loss": 2.76, "learning_rate": 9.316180734960723e-07, "epoch": 4.537938858515274, "total_flos": 4330469302243184640, "step": 1128000 }, { "loss": 2.7825, "learning_rate": 9.308069481001417e-07, "epoch": 4.538341158059468, "total_flos": 4330863348616212480, "step": 1128100 }, { "loss": 2.8075, "learning_rate": 9.299958227042111e-07, "epoch": 4.538743457603663, "total_flos": 4331239820088668160, "step": 1128200 }, { "loss": 2.805, "learning_rate": 9.291846973082805e-07, "epoch": 4.539145757147857, "total_flos": 4331636506149089280, "step": 1128300 }, { "loss": 2.83, "learning_rate": 9.283735719123499e-07, "epoch": 4.539548056692052, "total_flos": 4332031922822615040, "step": 1128400 }, { "loss": 2.715, "learning_rate": 9.275624465164193e-07, "epoch": 4.539950356236247, "total_flos": 4332399949419909120, "step": 1128500 }, { "loss": 2.7675, "learning_rate": 9.267513211204887e-07, "epoch": 4.540352655780441, "total_flos": 4332795265179832320, "step": 1128600 }, { "loss": 2.795, "learning_rate": 9.259401957245581e-07, "epoch": 4.540754955324635, "total_flos": 4333186576263106560, "step": 1128700 }, { "loss": 2.8125, "learning_rate": 9.251290703286275e-07, "epoch": 4.54115725486883, "total_flos": 4333541983348838400, "step": 1128800 }, { "loss": 2.815, "learning_rate": 9.24317944932697e-07, "epoch": 4.541559554413025, "total_flos": 4333935243658014720, "step": 1128900 }, { "loss": 2.7925, "learning_rate": 9.235068195367664e-07, "epoch": 4.541961853957219, "total_flos": 4334309978354257920, "step": 1129000 }, { "loss": 2.8425, "learning_rate": 9.226956941408358e-07, "epoch": 4.542364153501414, "total_flos": 4334680145382174720, "step": 1129100 }, { "loss": 2.78, "learning_rate": 9.218845687449052e-07, "epoch": 4.542766453045608, "total_flos": 4335072348754145280, "step": 1129200 }, { "loss": 2.765, "learning_rate": 9.210734433489746e-07, "epoch": 4.5431687525898035, "total_flos": 4335443179687342080, "step": 1129300 }, { "loss": 2.7475, "learning_rate": 9.20262317953044e-07, "epoch": 4.543571052133998, "total_flos": 4335815880177807360, "step": 1129400 }, { "loss": 2.7925, "learning_rate": 9.194511925571134e-07, "epoch": 4.543973351678193, "total_flos": 4336203823933501440, "step": 1129500 }, { "loss": 2.755, "learning_rate": 9.186400671611828e-07, "epoch": 4.544375651222387, "total_flos": 4336573879425331200, "step": 1129600 }, { "loss": 2.6975, "learning_rate": 9.178289417652522e-07, "epoch": 4.544777950766582, "total_flos": 4336942771755110400, "step": 1129700 }, { "loss": 2.7925, "learning_rate": 9.170178163693216e-07, "epoch": 4.545180250310777, "total_flos": 4337329674507325440, "step": 1129800 }, { "loss": 2.7475, "learning_rate": 9.16206690973391e-07, "epoch": 4.545582549854971, "total_flos": 4337719657780039680, "step": 1129900 }, { "loss": 2.775, "learning_rate": 9.153955655774604e-07, "epoch": 4.545984849399166, "total_flos": 4338091694365224960, "step": 1130000 }, { "loss": 2.8125, "learning_rate": 9.145844401815301e-07, "epoch": 4.54638714894336, "total_flos": 4338471246358179840, "step": 1130100 }, { "loss": 2.8125, "learning_rate": 9.137733147855995e-07, "epoch": 4.546789448487555, "total_flos": 4338850267226910720, "step": 1130200 }, { "loss": 2.7275, "learning_rate": 9.129621893896689e-07, "epoch": 4.547191748031749, "total_flos": 4339229362453032960, "step": 1130300 }, { "loss": 2.745, "learning_rate": 9.121510639937383e-07, "epoch": 4.547594047575944, "total_flos": 4339609429636485120, "step": 1130400 }, { "loss": 2.7575, "learning_rate": 9.113399385978077e-07, "epoch": 4.547996347120138, "total_flos": 4339991711607951360, "step": 1130500 }, { "loss": 2.7875, "learning_rate": 9.105288132018771e-07, "epoch": 4.548398646664333, "total_flos": 4340389268712099840, "step": 1130600 }, { "loss": 2.7825, "learning_rate": 9.097176878059465e-07, "epoch": 4.548800946208528, "total_flos": 4340780447014318080, "step": 1130700 }, { "loss": 2.7775, "learning_rate": 9.089065624100159e-07, "epoch": 4.5492032457527225, "total_flos": 4341145440892293120, "step": 1130800 }, { "loss": 2.8075, "learning_rate": 9.080954370140853e-07, "epoch": 4.5496055452969175, "total_flos": 4341542180065136640, "step": 1130900 }, { "loss": 2.81, "learning_rate": 9.072843116181547e-07, "epoch": 4.550007844841112, "total_flos": 4341922470320762880, "step": 1131000 }, { "loss": 2.7875, "learning_rate": 9.064731862222241e-07, "epoch": 4.550410144385307, "total_flos": 4342332407930572800, "step": 1131100 }, { "loss": 2.81, "learning_rate": 9.056620608262935e-07, "epoch": 4.550812443929501, "total_flos": 4342722778923970560, "step": 1131200 }, { "loss": 2.76, "learning_rate": 9.048509354303629e-07, "epoch": 4.551214743473696, "total_flos": 4343109633875005440, "step": 1131300 }, { "loss": 2.7825, "learning_rate": 9.040398100344324e-07, "epoch": 4.55161704301789, "total_flos": 4343477926034411520, "step": 1131400 }, { "loss": 2.7825, "learning_rate": 9.032286846385018e-07, "epoch": 4.552019342562085, "total_flos": 4343870575550730240, "step": 1131500 }, { "loss": 2.835, "learning_rate": 9.024175592425712e-07, "epoch": 4.55242164210628, "total_flos": 4344259985209282560, "step": 1131600 }, { "loss": 2.75, "learning_rate": 9.016064338466406e-07, "epoch": 4.552823941650474, "total_flos": 4344634969533911040, "step": 1131700 }, { "loss": 2.77, "learning_rate": 9.0079530845071e-07, "epoch": 4.553226241194668, "total_flos": 4345018844878049280, "step": 1131800 }, { "loss": 2.7975, "learning_rate": 8.999841830547794e-07, "epoch": 4.553628540738863, "total_flos": 4345417448296919040, "step": 1131900 }, { "loss": 2.7675, "learning_rate": 8.991730576588488e-07, "epoch": 4.554030840283058, "total_flos": 4345809359550566400, "step": 1132000 }, { "loss": 2.83, "learning_rate": 8.983619322629182e-07, "epoch": 4.554433139827252, "total_flos": 4346203326254960640, "step": 1132100 }, { "loss": 2.7575, "learning_rate": 8.975508068669876e-07, "epoch": 4.554835439371447, "total_flos": 4346594562980843520, "step": 1132200 }, { "loss": 2.7575, "learning_rate": 8.96739681471057e-07, "epoch": 4.5552377389156415, "total_flos": 4346962292148572160, "step": 1132300 }, { "loss": 2.745, "learning_rate": 8.959285560751265e-07, "epoch": 4.555640038459837, "total_flos": 4347332926565806080, "step": 1132400 }, { "loss": 2.815, "learning_rate": 8.951174306791959e-07, "epoch": 4.556042338004031, "total_flos": 4347716238918266880, "step": 1132500 }, { "loss": 2.7775, "learning_rate": 8.943063052832653e-07, "epoch": 4.556444637548226, "total_flos": 4348105186498744320, "step": 1132600 }, { "loss": 2.7675, "learning_rate": 8.934951798873348e-07, "epoch": 4.55684693709242, "total_flos": 4348490209071206400, "step": 1132700 }, { "loss": 2.68, "learning_rate": 8.926840544914042e-07, "epoch": 4.557249236636615, "total_flos": 4348875486583296000, "step": 1132800 }, { "loss": 2.8075, "learning_rate": 8.918729290954737e-07, "epoch": 4.55765153618081, "total_flos": 4349265209605140480, "step": 1132900 }, { "loss": 2.81, "learning_rate": 8.910618036995431e-07, "epoch": 4.558053835725004, "total_flos": 4349647480954122240, "step": 1133000 }, { "loss": 2.795, "learning_rate": 8.902506783036125e-07, "epoch": 4.558456135269199, "total_flos": 4350040077358018560, "step": 1133100 }, { "loss": 2.775, "learning_rate": 8.894395529076819e-07, "epoch": 4.558858434813393, "total_flos": 4350424266065448960, "step": 1133200 }, { "loss": 2.8325, "learning_rate": 8.886284275117513e-07, "epoch": 4.559260734357588, "total_flos": 4350813962531082240, "step": 1133300 }, { "loss": 2.705, "learning_rate": 8.878173021158207e-07, "epoch": 4.559663033901782, "total_flos": 4351195060095528960, "step": 1133400 }, { "loss": 2.8075, "learning_rate": 8.870061767198901e-07, "epoch": 4.560065333445977, "total_flos": 4351564680065495040, "step": 1133500 }, { "loss": 2.7875, "learning_rate": 8.861950513239595e-07, "epoch": 4.560467632990171, "total_flos": 4351953160256655360, "step": 1133600 }, { "loss": 2.78, "learning_rate": 8.853839259280289e-07, "epoch": 4.5608699325343665, "total_flos": 4352336982488371200, "step": 1133700 }, { "loss": 2.735, "learning_rate": 8.845728005320983e-07, "epoch": 4.5612722320785615, "total_flos": 4352720454178099200, "step": 1133800 }, { "loss": 2.8225, "learning_rate": 8.837616751361678e-07, "epoch": 4.561674531622756, "total_flos": 4353121203338833920, "step": 1133900 }, { "loss": 2.81, "learning_rate": 8.829505497402372e-07, "epoch": 4.562076831166951, "total_flos": 4353520215723356160, "step": 1134000 }, { "loss": 2.77, "learning_rate": 8.821394243443066e-07, "epoch": 4.562479130711145, "total_flos": 4353900341330472960, "step": 1134100 }, { "loss": 2.7575, "learning_rate": 8.81328298948376e-07, "epoch": 4.56288143025534, "total_flos": 4354288763097968640, "step": 1134200 }, { "loss": 2.835, "learning_rate": 8.805171735524454e-07, "epoch": 4.563283729799534, "total_flos": 4354673482929623040, "step": 1134300 }, { "loss": 2.71, "learning_rate": 8.797060481565148e-07, "epoch": 4.563686029343729, "total_flos": 4355071491489361920, "step": 1134400 }, { "loss": 2.7925, "learning_rate": 8.788949227605842e-07, "epoch": 4.564088328887923, "total_flos": 4355460178818969600, "step": 1134500 }, { "loss": 2.78, "learning_rate": 8.780837973646536e-07, "epoch": 4.564490628432118, "total_flos": 4355849816860938240, "step": 1134600 }, { "loss": 2.7275, "learning_rate": 8.77272671968723e-07, "epoch": 4.564892927976313, "total_flos": 4356232231613460480, "step": 1134700 }, { "loss": 2.8, "learning_rate": 8.764615465727925e-07, "epoch": 4.565295227520507, "total_flos": 4356621423511080960, "step": 1134800 }, { "loss": 2.8275, "learning_rate": 8.756504211768619e-07, "epoch": 4.565697527064702, "total_flos": 4357008320952053760, "step": 1134900 }, { "loss": 2.785, "learning_rate": 8.748392957809313e-07, "epoch": 4.566099826608896, "total_flos": 4357409595925770240, "step": 1135000 }, { "loss": 2.8025, "learning_rate": 8.740281703850008e-07, "epoch": 4.566502126153091, "total_flos": 4357796461499289600, "step": 1135100 }, { "loss": 2.8025, "learning_rate": 8.732170449890702e-07, "epoch": 4.5669044256972855, "total_flos": 4358164711168757760, "step": 1135200 }, { "loss": 2.8175, "learning_rate": 8.724059195931396e-07, "epoch": 4.5673067252414805, "total_flos": 4358546095540285440, "step": 1135300 }, { "loss": 2.8525, "learning_rate": 8.71594794197209e-07, "epoch": 4.567709024785675, "total_flos": 4358932074136350720, "step": 1135400 }, { "loss": 2.7675, "learning_rate": 8.707836688012784e-07, "epoch": 4.56811132432987, "total_flos": 4359305024255201280, "step": 1135500 }, { "loss": 2.8175, "learning_rate": 8.699725434053478e-07, "epoch": 4.568513623874064, "total_flos": 4359695767035555840, "step": 1135600 }, { "loss": 2.7775, "learning_rate": 8.691614180094173e-07, "epoch": 4.568915923418259, "total_flos": 4360073693788385280, "step": 1135700 }, { "loss": 2.7825, "learning_rate": 8.683502926134867e-07, "epoch": 4.569318222962453, "total_flos": 4360440769673318400, "step": 1135800 }, { "loss": 2.8175, "learning_rate": 8.675391672175561e-07, "epoch": 4.569720522506648, "total_flos": 4360820682830745600, "step": 1135900 }, { "loss": 2.7275, "learning_rate": 8.667280418216255e-07, "epoch": 4.570122822050843, "total_flos": 4361211600882094080, "step": 1136000 }, { "loss": 2.8625, "learning_rate": 8.659169164256949e-07, "epoch": 4.570525121595037, "total_flos": 4361606183690588160, "step": 1136100 }, { "loss": 2.7575, "learning_rate": 8.651057910297643e-07, "epoch": 4.570927421139232, "total_flos": 4361992337557647360, "step": 1136200 }, { "loss": 2.7075, "learning_rate": 8.642946656338337e-07, "epoch": 4.571329720683426, "total_flos": 4362388471248875520, "step": 1136300 }, { "loss": 2.7575, "learning_rate": 8.634835402379032e-07, "epoch": 4.571732020227621, "total_flos": 4362776043217612800, "step": 1136400 }, { "loss": 2.755, "learning_rate": 8.626724148419726e-07, "epoch": 4.572134319771815, "total_flos": 4363164449051381760, "step": 1136500 }, { "loss": 2.74, "learning_rate": 8.61861289446042e-07, "epoch": 4.57253661931601, "total_flos": 4363555765445898240, "step": 1136600 }, { "loss": 2.7975, "learning_rate": 8.610501640501114e-07, "epoch": 4.5729389188602045, "total_flos": 4363937808411463680, "step": 1136700 }, { "loss": 2.825, "learning_rate": 8.602390386541808e-07, "epoch": 4.5733412184043996, "total_flos": 4364336050665861120, "step": 1136800 }, { "loss": 2.765, "learning_rate": 8.594279132582502e-07, "epoch": 4.573743517948595, "total_flos": 4364721678719938560, "step": 1136900 }, { "loss": 2.805, "learning_rate": 8.586167878623196e-07, "epoch": 4.574145817492789, "total_flos": 4365115278948618240, "step": 1137000 }, { "loss": 2.7775, "learning_rate": 8.57805662466389e-07, "epoch": 4.574548117036984, "total_flos": 4365492005360701440, "step": 1137100 }, { "loss": 2.8075, "learning_rate": 8.569945370704585e-07, "epoch": 4.574950416581178, "total_flos": 4365874935303720960, "step": 1137200 }, { "loss": 2.775, "learning_rate": 8.561834116745279e-07, "epoch": 4.575352716125373, "total_flos": 4366265991447367680, "step": 1137300 }, { "loss": 2.765, "learning_rate": 8.553722862785973e-07, "epoch": 4.575755015669567, "total_flos": 4366648337153740800, "step": 1137400 }, { "loss": 2.7775, "learning_rate": 8.545611608826667e-07, "epoch": 4.576157315213762, "total_flos": 4367054068259696640, "step": 1137500 }, { "loss": 2.7925, "learning_rate": 8.537500354867362e-07, "epoch": 4.576559614757956, "total_flos": 4367443727546634240, "step": 1137600 }, { "loss": 2.8175, "learning_rate": 8.529389100908056e-07, "epoch": 4.576961914302151, "total_flos": 4367811870991257600, "step": 1137700 }, { "loss": 2.8, "learning_rate": 8.52127784694875e-07, "epoch": 4.577364213846346, "total_flos": 4368198311665397760, "step": 1137800 }, { "loss": 2.8025, "learning_rate": 8.513166592989444e-07, "epoch": 4.57776651339054, "total_flos": 4368588294938112000, "step": 1137900 }, { "loss": 2.7525, "learning_rate": 8.505055339030138e-07, "epoch": 4.578168812934735, "total_flos": 4368983934683811840, "step": 1138000 }, { "loss": 2.765, "learning_rate": 8.496944085070832e-07, "epoch": 4.5785711124789295, "total_flos": 4369378501558579200, "step": 1138100 }, { "loss": 2.795, "learning_rate": 8.488832831111526e-07, "epoch": 4.5789734120231245, "total_flos": 4369772431084277760, "step": 1138200 }, { "loss": 2.8325, "learning_rate": 8.48072157715222e-07, "epoch": 4.579375711567319, "total_flos": 4370144292398469120, "step": 1138300 }, { "loss": 2.7925, "learning_rate": 8.472610323192914e-07, "epoch": 4.579778011111514, "total_flos": 4370525639591301120, "step": 1138400 }, { "loss": 2.79, "learning_rate": 8.464499069233609e-07, "epoch": 4.580180310655708, "total_flos": 4370912324582584320, "step": 1138500 }, { "loss": 2.7925, "learning_rate": 8.456387815274303e-07, "epoch": 4.580582610199903, "total_flos": 4371277902697205760, "step": 1138600 }, { "loss": 2.765, "learning_rate": 8.448276561314997e-07, "epoch": 4.580984909744097, "total_flos": 4371661873643704320, "step": 1138700 }, { "loss": 2.7375, "learning_rate": 8.440165307355692e-07, "epoch": 4.581387209288292, "total_flos": 4372045016036413440, "step": 1138800 }, { "loss": 2.7375, "learning_rate": 8.432054053396386e-07, "epoch": 4.581789508832486, "total_flos": 4372430373217136640, "step": 1138900 }, { "loss": 2.82, "learning_rate": 8.42394279943708e-07, "epoch": 4.582191808376681, "total_flos": 4372805341608038400, "step": 1139000 }, { "loss": 2.7775, "learning_rate": 8.415831545477774e-07, "epoch": 4.582594107920876, "total_flos": 4373190337624289280, "step": 1139100 }, { "loss": 2.7625, "learning_rate": 8.407720291518468e-07, "epoch": 4.58299640746507, "total_flos": 4373580942312345600, "step": 1139200 }, { "loss": 2.7575, "learning_rate": 8.399609037559162e-07, "epoch": 4.583398707009265, "total_flos": 4373952245946101760, "step": 1139300 }, { "loss": 2.7675, "learning_rate": 8.391497783599856e-07, "epoch": 4.583801006553459, "total_flos": 4374332605247877120, "step": 1139400 }, { "loss": 2.7975, "learning_rate": 8.38338652964055e-07, "epoch": 4.584203306097654, "total_flos": 4374712486537850880, "step": 1139500 }, { "loss": 2.775, "learning_rate": 8.375275275681245e-07, "epoch": 4.5846056056418485, "total_flos": 4375099341488885760, "step": 1139600 }, { "loss": 2.785, "learning_rate": 8.367164021721939e-07, "epoch": 4.5850079051860435, "total_flos": 4375486010546442240, "step": 1139700 }, { "loss": 2.775, "learning_rate": 8.359052767762633e-07, "epoch": 4.585410204730238, "total_flos": 4375887577638481920, "step": 1139800 }, { "loss": 2.8075, "learning_rate": 8.350941513803327e-07, "epoch": 4.585812504274433, "total_flos": 4376267687311872000, "step": 1139900 }, { "loss": 2.815, "learning_rate": 8.342830259844021e-07, "epoch": 4.586214803818628, "total_flos": 4376650420738928640, "step": 1140000 }, { "loss": 2.8, "learning_rate": 8.334719005884716e-07, "epoch": 4.586617103362822, "total_flos": 4377024656178401280, "step": 1140100 }, { "loss": 2.785, "learning_rate": 8.32660775192541e-07, "epoch": 4.587019402907017, "total_flos": 4377409742485770240, "step": 1140200 }, { "loss": 2.8025, "learning_rate": 8.318496497966104e-07, "epoch": 4.587421702451211, "total_flos": 4377781311681638400, "step": 1140300 }, { "loss": 2.795, "learning_rate": 8.310385244006798e-07, "epoch": 4.587824001995406, "total_flos": 4378168336592424960, "step": 1140400 }, { "loss": 2.865, "learning_rate": 8.302273990047492e-07, "epoch": 4.5882263015396, "total_flos": 4378559121862717440, "step": 1140500 }, { "loss": 2.79, "learning_rate": 8.294162736088186e-07, "epoch": 4.588628601083795, "total_flos": 4378952557442887680, "step": 1140600 }, { "loss": 2.8175, "learning_rate": 8.28605148212888e-07, "epoch": 4.589030900627989, "total_flos": 4379349418774302720, "step": 1140700 }, { "loss": 2.84, "learning_rate": 8.277940228169574e-07, "epoch": 4.589433200172184, "total_flos": 4379728641470238720, "step": 1140800 }, { "loss": 2.7825, "learning_rate": 8.269828974210268e-07, "epoch": 4.589835499716379, "total_flos": 4380107890722385920, "step": 1140900 }, { "loss": 2.82, "learning_rate": 8.261717720250962e-07, "epoch": 4.590237799260573, "total_flos": 4380490656016896000, "step": 1141000 }, { "loss": 2.78, "learning_rate": 8.253606466291656e-07, "epoch": 4.590640098804768, "total_flos": 4380865348223201280, "step": 1141100 }, { "loss": 2.7825, "learning_rate": 8.24549521233235e-07, "epoch": 4.5910423983489625, "total_flos": 4381251124992061440, "step": 1141200 }, { "loss": 2.77, "learning_rate": 8.237383958373044e-07, "epoch": 4.591444697893158, "total_flos": 4381636636198809600, "step": 1141300 }, { "loss": 2.81, "learning_rate": 8.22927270441374e-07, "epoch": 4.591846997437352, "total_flos": 4382022131471831040, "step": 1141400 }, { "loss": 2.7625, "learning_rate": 8.221161450454434e-07, "epoch": 4.592249296981547, "total_flos": 4382418328897966080, "step": 1141500 }, { "loss": 2.7775, "learning_rate": 8.213050196495128e-07, "epoch": 4.592651596525741, "total_flos": 4382810144549253120, "step": 1141600 }, { "loss": 2.785, "learning_rate": 8.204938942535822e-07, "epoch": 4.593053896069936, "total_flos": 4383200589900042240, "step": 1141700 }, { "loss": 2.7875, "learning_rate": 8.196827688576516e-07, "epoch": 4.59345619561413, "total_flos": 4383607786908856320, "step": 1141800 }, { "loss": 2.77, "learning_rate": 8.188716434617211e-07, "epoch": 4.593858495158325, "total_flos": 4383993558366474240, "step": 1141900 }, { "loss": 2.795, "learning_rate": 8.180605180657905e-07, "epoch": 4.594260794702519, "total_flos": 4384387668474408960, "step": 1142000 }, { "loss": 2.8075, "learning_rate": 8.172493926698599e-07, "epoch": 4.594663094246714, "total_flos": 4384766051994071040, "step": 1142100 }, { "loss": 2.7825, "learning_rate": 8.164382672739293e-07, "epoch": 4.595065393790909, "total_flos": 4385162568094740480, "step": 1142200 }, { "loss": 2.78, "learning_rate": 8.156271418779987e-07, "epoch": 4.595467693335103, "total_flos": 4385558797388328960, "step": 1142300 }, { "loss": 2.77, "learning_rate": 8.148160164820681e-07, "epoch": 4.595869992879298, "total_flos": 4385945477068369920, "step": 1142400 }, { "loss": 2.7725, "learning_rate": 8.140048910861376e-07, "epoch": 4.5962722924234924, "total_flos": 4386331869941329920, "step": 1142500 }, { "loss": 2.7375, "learning_rate": 8.13193765690207e-07, "epoch": 4.5966745919676875, "total_flos": 4386721125573857280, "step": 1142600 }, { "loss": 2.79, "learning_rate": 8.123826402942764e-07, "epoch": 4.597076891511882, "total_flos": 4387103519081410560, "step": 1142700 }, { "loss": 2.7375, "learning_rate": 8.115715148983458e-07, "epoch": 4.597479191056077, "total_flos": 4387488817838469120, "step": 1142800 }, { "loss": 2.76, "learning_rate": 8.107603895024152e-07, "epoch": 4.597881490600271, "total_flos": 4387874913281863680, "step": 1142900 }, { "loss": 2.7875, "learning_rate": 8.099492641064846e-07, "epoch": 4.598283790144466, "total_flos": 4388244453583196160, "step": 1143000 }, { "loss": 2.78, "learning_rate": 8.09138138710554e-07, "epoch": 4.598686089688661, "total_flos": 4388616309586145280, "step": 1143100 }, { "loss": 2.7975, "learning_rate": 8.083270133146234e-07, "epoch": 4.599088389232855, "total_flos": 4388996663576678400, "step": 1143200 }, { "loss": 2.825, "learning_rate": 8.075158879186928e-07, "epoch": 4.59949068877705, "total_flos": 4389377830187274240, "step": 1143300 }, { "loss": 2.785, "learning_rate": 8.067047625227622e-07, "epoch": 4.599892988321244, "total_flos": 4389750573167677440, "step": 1143400 }, { "loss": 2.745, "learning_rate": 8.058936371268316e-07, "epoch": 4.600295287865439, "total_flos": 4390136137486848000, "step": 1143500 }, { "loss": 2.7625, "learning_rate": 8.05082511730901e-07, "epoch": 4.600697587409633, "total_flos": 4390506166422466560, "step": 1143600 }, { "loss": 2.7925, "learning_rate": 8.042713863349704e-07, "epoch": 4.601099886953828, "total_flos": 4390887864157286400, "step": 1143700 }, { "loss": 2.835, "learning_rate": 8.034602609390398e-07, "epoch": 4.601502186498022, "total_flos": 4391257409769861120, "step": 1143800 }, { "loss": 2.75, "learning_rate": 8.026491355431093e-07, "epoch": 4.601904486042217, "total_flos": 4391626705754050560, "step": 1143900 }, { "loss": 2.7625, "learning_rate": 8.018380101471787e-07, "epoch": 4.602306785586412, "total_flos": 4392006199323340800, "step": 1144000 }, { "loss": 2.75, "learning_rate": 8.010268847512481e-07, "epoch": 4.6027090851306065, "total_flos": 4392390998823628800, "step": 1144100 }, { "loss": 2.7825, "learning_rate": 8.002157593553176e-07, "epoch": 4.6031113846748015, "total_flos": 4392783882034606080, "step": 1144200 }, { "loss": 2.8625, "learning_rate": 7.994046339593871e-07, "epoch": 4.603513684218996, "total_flos": 4393171130017566720, "step": 1144300 }, { "loss": 2.7675, "learning_rate": 7.985935085634565e-07, "epoch": 4.603915983763191, "total_flos": 4393549561338408960, "step": 1144400 }, { "loss": 2.785, "learning_rate": 7.977823831675259e-07, "epoch": 4.604318283307385, "total_flos": 4393935545245716480, "step": 1144500 }, { "loss": 2.7725, "learning_rate": 7.969712577715953e-07, "epoch": 4.60472058285158, "total_flos": 4394308325404815360, "step": 1144600 }, { "loss": 2.8, "learning_rate": 7.961601323756647e-07, "epoch": 4.605122882395774, "total_flos": 4394695238779514880, "step": 1144700 }, { "loss": 2.8625, "learning_rate": 7.953490069797341e-07, "epoch": 4.605525181939969, "total_flos": 4395076123894272000, "step": 1144800 }, { "loss": 2.82, "learning_rate": 7.945378815838035e-07, "epoch": 4.605927481484164, "total_flos": 4395456796559339520, "step": 1144900 }, { "loss": 2.75, "learning_rate": 7.93726756187873e-07, "epoch": 4.606329781028358, "total_flos": 4395829348335022080, "step": 1145000 }, { "loss": 2.7525, "learning_rate": 7.929156307919424e-07, "epoch": 4.606732080572552, "total_flos": 4396196551689768960, "step": 1145100 }, { "loss": 2.7575, "learning_rate": 7.921045053960118e-07, "epoch": 4.607134380116747, "total_flos": 4396571052691353600, "step": 1145200 }, { "loss": 2.7975, "learning_rate": 7.912933800000812e-07, "epoch": 4.607536679660942, "total_flos": 4396953116901888000, "step": 1145300 }, { "loss": 2.72, "learning_rate": 7.904822546041506e-07, "epoch": 4.607938979205136, "total_flos": 4397330608132853760, "step": 1145400 }, { "loss": 2.77, "learning_rate": 7.8967112920822e-07, "epoch": 4.608341278749331, "total_flos": 4397720193062400000, "step": 1145500 }, { "loss": 2.775, "learning_rate": 7.888600038122894e-07, "epoch": 4.6087435782935255, "total_flos": 4398104663265669120, "step": 1145600 }, { "loss": 2.7425, "learning_rate": 7.880488784163588e-07, "epoch": 4.609145877837721, "total_flos": 4398492463617822720, "step": 1145700 }, { "loss": 2.8475, "learning_rate": 7.872377530204282e-07, "epoch": 4.609548177381915, "total_flos": 4398864170905989120, "step": 1145800 }, { "loss": 2.76, "learning_rate": 7.864266276244976e-07, "epoch": 4.60995047692611, "total_flos": 4399245008219566080, "step": 1145900 }, { "loss": 2.8275, "learning_rate": 7.85615502228567e-07, "epoch": 4.610352776470304, "total_flos": 4399638305707438080, "step": 1146000 }, { "loss": 2.74, "learning_rate": 7.848043768326364e-07, "epoch": 4.610755076014499, "total_flos": 4400010496318648320, "step": 1146100 }, { "loss": 2.8, "learning_rate": 7.839932514367058e-07, "epoch": 4.611157375558694, "total_flos": 4400391801021542400, "step": 1146200 }, { "loss": 2.7775, "learning_rate": 7.831821260407753e-07, "epoch": 4.611559675102888, "total_flos": 4400779096805683200, "step": 1146300 }, { "loss": 2.8075, "learning_rate": 7.823710006448447e-07, "epoch": 4.611961974647083, "total_flos": 4401154665366958080, "step": 1146400 }, { "loss": 2.7875, "learning_rate": 7.815598752489141e-07, "epoch": 4.612364274191277, "total_flos": 4401518666042634240, "step": 1146500 }, { "loss": 2.7275, "learning_rate": 7.807487498529835e-07, "epoch": 4.612766573735472, "total_flos": 4401900825855528960, "step": 1146600 }, { "loss": 2.7475, "learning_rate": 7.799376244570531e-07, "epoch": 4.613168873279666, "total_flos": 4402293220432220160, "step": 1146700 }, { "loss": 2.78, "learning_rate": 7.791264990611225e-07, "epoch": 4.613571172823861, "total_flos": 4402676150375239680, "step": 1146800 }, { "loss": 2.8325, "learning_rate": 7.783153736651919e-07, "epoch": 4.613973472368055, "total_flos": 4403067992582737920, "step": 1146900 }, { "loss": 2.785, "learning_rate": 7.775042482692613e-07, "epoch": 4.6143757719122505, "total_flos": 4403454603216629760, "step": 1147000 }, { "loss": 2.7025, "learning_rate": 7.766931228733307e-07, "epoch": 4.6147780714564455, "total_flos": 4403848288425185280, "step": 1147100 }, { "loss": 2.74, "learning_rate": 7.758819974774001e-07, "epoch": 4.61518037100064, "total_flos": 4404239270211440640, "step": 1147200 }, { "loss": 2.7875, "learning_rate": 7.750708720814695e-07, "epoch": 4.615582670544835, "total_flos": 4404621015747440640, "step": 1147300 }, { "loss": 2.7775, "learning_rate": 7.742597466855389e-07, "epoch": 4.615984970089029, "total_flos": 4405002729415987200, "step": 1147400 }, { "loss": 2.7525, "learning_rate": 7.734486212896084e-07, "epoch": 4.616387269633224, "total_flos": 4405382350455091200, "step": 1147500 }, { "loss": 2.78, "learning_rate": 7.726374958936778e-07, "epoch": 4.616789569177418, "total_flos": 4405763288682270720, "step": 1147600 }, { "loss": 2.7775, "learning_rate": 7.718263704977472e-07, "epoch": 4.617191868721613, "total_flos": 4406141741248081920, "step": 1147700 }, { "loss": 2.7825, "learning_rate": 7.710152451018166e-07, "epoch": 4.617594168265807, "total_flos": 4406549819923107840, "step": 1147800 }, { "loss": 2.81, "learning_rate": 7.70204119705886e-07, "epoch": 4.617996467810002, "total_flos": 4406944158414458880, "step": 1147900 }, { "loss": 2.7675, "learning_rate": 7.693929943099554e-07, "epoch": 4.618398767354197, "total_flos": 4407329271278039040, "step": 1148000 }, { "loss": 2.785, "learning_rate": 7.685818689140248e-07, "epoch": 4.618801066898391, "total_flos": 4407708913562112000, "step": 1148100 }, { "loss": 2.765, "learning_rate": 7.677707435180942e-07, "epoch": 4.619203366442585, "total_flos": 4408091121176186880, "step": 1148200 }, { "loss": 2.79, "learning_rate": 7.669596181221636e-07, "epoch": 4.61960566598678, "total_flos": 4408478262934302720, "step": 1148300 }, { "loss": 2.775, "learning_rate": 7.66148492726233e-07, "epoch": 4.620007965530975, "total_flos": 4408864708919685120, "step": 1148400 }, { "loss": 2.7775, "learning_rate": 7.653373673303024e-07, "epoch": 4.6204102650751695, "total_flos": 4409253343136870400, "step": 1148500 }, { "loss": 2.7725, "learning_rate": 7.645262419343718e-07, "epoch": 4.6208125646193645, "total_flos": 4409631392048271360, "step": 1148600 }, { "loss": 2.8225, "learning_rate": 7.637151165384412e-07, "epoch": 4.621214864163559, "total_flos": 4410029326250618880, "step": 1148700 }, { "loss": 2.785, "learning_rate": 7.629039911425107e-07, "epoch": 4.621617163707754, "total_flos": 4410419957494886400, "step": 1148800 }, { "loss": 2.785, "learning_rate": 7.620928657465801e-07, "epoch": 4.622019463251948, "total_flos": 4410794453185228800, "step": 1148900 }, { "loss": 2.7675, "learning_rate": 7.612817403506497e-07, "epoch": 4.622421762796143, "total_flos": 4411158384814755840, "step": 1149000 }, { "loss": 2.7775, "learning_rate": 7.604706149547191e-07, "epoch": 4.622824062340337, "total_flos": 4411553721819648000, "step": 1149100 }, { "loss": 2.75, "learning_rate": 7.596594895587885e-07, "epoch": 4.623226361884532, "total_flos": 4411921031399239680, "step": 1149200 }, { "loss": 2.73, "learning_rate": 7.588483641628579e-07, "epoch": 4.623628661428727, "total_flos": 4412304811141017600, "step": 1149300 }, { "loss": 2.8225, "learning_rate": 7.580372387669273e-07, "epoch": 4.624030960972921, "total_flos": 4412689823090995200, "step": 1149400 }, { "loss": 2.7825, "learning_rate": 7.572261133709967e-07, "epoch": 4.624433260517116, "total_flos": 4413079094657249280, "step": 1149500 }, { "loss": 2.805, "learning_rate": 7.564149879750661e-07, "epoch": 4.62483556006131, "total_flos": 4413488177157058560, "step": 1149600 }, { "loss": 2.735, "learning_rate": 7.556038625791355e-07, "epoch": 4.625237859605505, "total_flos": 4413870549419642880, "step": 1149700 }, { "loss": 2.8425, "learning_rate": 7.547927371832049e-07, "epoch": 4.625640159149699, "total_flos": 4414260261819002880, "step": 1149800 }, { "loss": 2.805, "learning_rate": 7.539816117872743e-07, "epoch": 4.626042458693894, "total_flos": 4414635915360153600, "step": 1149900 }, { "loss": 2.8125, "learning_rate": 7.531704863913438e-07, "epoch": 4.6264447582380885, "total_flos": 4415008753942917120, "step": 1150000 }, { "loss": 2.805, "learning_rate": 7.523593609954132e-07, "epoch": 4.626847057782284, "total_flos": 4415401865537310720, "step": 1150100 }, { "loss": 2.7275, "learning_rate": 7.515482355994826e-07, "epoch": 4.627249357326479, "total_flos": 4415779808223866880, "step": 1150200 }, { "loss": 2.7775, "learning_rate": 7.50737110203552e-07, "epoch": 4.627651656870673, "total_flos": 4416161335998935040, "step": 1150300 }, { "loss": 2.7875, "learning_rate": 7.499259848076214e-07, "epoch": 4.628053956414868, "total_flos": 4416548945146368000, "step": 1150400 }, { "loss": 2.7325, "learning_rate": 7.491148594116908e-07, "epoch": 4.628456255959062, "total_flos": 4416930159558144000, "step": 1150500 }, { "loss": 2.86, "learning_rate": 7.483037340157602e-07, "epoch": 4.628858555503257, "total_flos": 4417313254149672960, "step": 1150600 }, { "loss": 2.8425, "learning_rate": 7.474926086198296e-07, "epoch": 4.629260855047451, "total_flos": 4417695552054865920, "step": 1150700 }, { "loss": 2.7525, "learning_rate": 7.46681483223899e-07, "epoch": 4.629663154591646, "total_flos": 4418089566560440320, "step": 1150800 }, { "loss": 2.7725, "learning_rate": 7.458703578279684e-07, "epoch": 4.63006545413584, "total_flos": 4418469973663395840, "step": 1150900 }, { "loss": 2.7375, "learning_rate": 7.450592324320378e-07, "epoch": 4.630467753680035, "total_flos": 4418860249054433280, "step": 1151000 }, { "loss": 2.7875, "learning_rate": 7.442481070361072e-07, "epoch": 4.63087005322423, "total_flos": 4419251448601620480, "step": 1151100 }, { "loss": 2.775, "learning_rate": 7.434369816401766e-07, "epoch": 4.631272352768424, "total_flos": 4419637544045015040, "step": 1151200 }, { "loss": 2.8275, "learning_rate": 7.426258562442461e-07, "epoch": 4.631674652312618, "total_flos": 4420025015100149760, "step": 1151300 }, { "loss": 2.7775, "learning_rate": 7.418147308483157e-07, "epoch": 4.6320769518568135, "total_flos": 4420403372063600640, "step": 1151400 }, { "loss": 2.77, "learning_rate": 7.410036054523851e-07, "epoch": 4.6324792514010085, "total_flos": 4420789361282150400, "step": 1151500 }, { "loss": 2.7825, "learning_rate": 7.401924800564545e-07, "epoch": 4.632881550945203, "total_flos": 4421186684691640320, "step": 1151600 }, { "loss": 2.835, "learning_rate": 7.393813546605239e-07, "epoch": 4.633283850489398, "total_flos": 4421578951798517760, "step": 1151700 }, { "loss": 2.7525, "learning_rate": 7.385702292645933e-07, "epoch": 4.633686150033592, "total_flos": 4421960585798430720, "step": 1151800 }, { "loss": 2.8375, "learning_rate": 7.377591038686627e-07, "epoch": 4.634088449577787, "total_flos": 4422348062164807680, "step": 1151900 }, { "loss": 2.8525, "learning_rate": 7.369479784727321e-07, "epoch": 4.634490749121981, "total_flos": 4422740398317834240, "step": 1152000 }, { "loss": 2.7725, "learning_rate": 7.361368530768015e-07, "epoch": 4.634893048666176, "total_flos": 4423138879578132480, "step": 1152100 }, { "loss": 2.7575, "learning_rate": 7.353257276808709e-07, "epoch": 4.63529534821037, "total_flos": 4423522462803947520, "step": 1152200 }, { "loss": 2.785, "learning_rate": 7.345146022849403e-07, "epoch": 4.635697647754565, "total_flos": 4423918777077411840, "step": 1152300 }, { "loss": 2.755, "learning_rate": 7.337034768890098e-07, "epoch": 4.63609994729876, "total_flos": 4424289082197626880, "step": 1152400 }, { "loss": 2.755, "learning_rate": 7.328923514930792e-07, "epoch": 4.636502246842954, "total_flos": 4424679655018229760, "step": 1152500 }, { "loss": 2.81, "learning_rate": 7.320812260971486e-07, "epoch": 4.636904546387149, "total_flos": 4425050719646085120, "step": 1152600 }, { "loss": 2.82, "learning_rate": 7.31270100701218e-07, "epoch": 4.637306845931343, "total_flos": 4425433617721651200, "step": 1152700 }, { "loss": 2.7975, "learning_rate": 7.304589753052874e-07, "epoch": 4.637709145475538, "total_flos": 4425803227069132800, "step": 1152800 }, { "loss": 2.76, "learning_rate": 7.296478499093568e-07, "epoch": 4.6381114450197325, "total_flos": 4426174854688665600, "step": 1152900 }, { "loss": 2.78, "learning_rate": 7.288367245134262e-07, "epoch": 4.6385137445639275, "total_flos": 4426576591740456960, "step": 1153000 }, { "loss": 2.79, "learning_rate": 7.280255991174956e-07, "epoch": 4.638916044108122, "total_flos": 4426960780447887360, "step": 1153100 }, { "loss": 2.715, "learning_rate": 7.27214473721565e-07, "epoch": 4.639318343652317, "total_flos": 4427348777316003840, "step": 1153200 }, { "loss": 2.7625, "learning_rate": 7.264033483256344e-07, "epoch": 4.639720643196512, "total_flos": 4427727097100759040, "step": 1153300 }, { "loss": 2.77, "learning_rate": 7.255922229297038e-07, "epoch": 4.640122942740706, "total_flos": 4428110037666263040, "step": 1153400 }, { "loss": 2.745, "learning_rate": 7.247810975337732e-07, "epoch": 4.640525242284901, "total_flos": 4428493743050649600, "step": 1153500 }, { "loss": 2.7475, "learning_rate": 7.239699721378426e-07, "epoch": 4.640927541829095, "total_flos": 4428884560188395520, "step": 1153600 }, { "loss": 2.7575, "learning_rate": 7.23158846741912e-07, "epoch": 4.64132984137329, "total_flos": 4429279780345958400, "step": 1153700 }, { "loss": 2.8025, "learning_rate": 7.223477213459816e-07, "epoch": 4.641732140917484, "total_flos": 4429662593441648640, "step": 1153800 }, { "loss": 2.76, "learning_rate": 7.21536595950051e-07, "epoch": 4.642134440461679, "total_flos": 4430029988001116160, "step": 1153900 }, { "loss": 2.7825, "learning_rate": 7.207254705541204e-07, "epoch": 4.642536740005873, "total_flos": 4430405551251148800, "step": 1154000 }, { "loss": 2.755, "learning_rate": 7.199143451581898e-07, "epoch": 4.642939039550068, "total_flos": 4430798503508275200, "step": 1154100 }, { "loss": 2.7625, "learning_rate": 7.191032197622592e-07, "epoch": 4.643341339094263, "total_flos": 4431178915922472960, "step": 1154200 }, { "loss": 2.81, "learning_rate": 7.182920943663287e-07, "epoch": 4.643743638638457, "total_flos": 4431548647428526080, "step": 1154300 }, { "loss": 2.7525, "learning_rate": 7.174809689703981e-07, "epoch": 4.644145938182652, "total_flos": 4431935428022169600, "step": 1154400 }, { "loss": 2.75, "learning_rate": 7.166698435744675e-07, "epoch": 4.6445482377268466, "total_flos": 4432323106215751680, "step": 1154500 }, { "loss": 2.82, "learning_rate": 7.158587181785369e-07, "epoch": 4.644950537271042, "total_flos": 4432706641640386560, "step": 1154600 }, { "loss": 2.745, "learning_rate": 7.150475927826063e-07, "epoch": 4.645352836815236, "total_flos": 4433085524416819200, "step": 1154700 }, { "loss": 2.7925, "learning_rate": 7.142364673866757e-07, "epoch": 4.645755136359431, "total_flos": 4433478386382827520, "step": 1154800 }, { "loss": 2.82, "learning_rate": 7.134253419907452e-07, "epoch": 4.646157435903625, "total_flos": 4433858262361559040, "step": 1154900 }, { "loss": 2.745, "learning_rate": 7.126142165948146e-07, "epoch": 4.64655973544782, "total_flos": 4434234898482524160, "step": 1155000 }, { "loss": 2.8275, "learning_rate": 7.11803091198884e-07, "epoch": 4.646962034992014, "total_flos": 4434614705415106560, "step": 1155100 }, { "loss": 2.755, "learning_rate": 7.109919658029534e-07, "epoch": 4.647364334536209, "total_flos": 4435001857795706880, "step": 1155200 }, { "loss": 2.765, "learning_rate": 7.101808404070228e-07, "epoch": 4.647766634080403, "total_flos": 4435390603548979200, "step": 1155300 }, { "loss": 2.8125, "learning_rate": 7.093697150110922e-07, "epoch": 4.648168933624598, "total_flos": 4435778775688089600, "step": 1155400 }, { "loss": 2.7525, "learning_rate": 7.085585896151616e-07, "epoch": 4.648571233168793, "total_flos": 4436151146881536000, "step": 1155500 }, { "loss": 2.7975, "learning_rate": 7.07747464219231e-07, "epoch": 4.648973532712987, "total_flos": 4436538697605304320, "step": 1155600 }, { "loss": 2.7775, "learning_rate": 7.069363388233004e-07, "epoch": 4.649375832257182, "total_flos": 4436937524096348160, "step": 1155700 }, { "loss": 2.7725, "learning_rate": 7.061252134273698e-07, "epoch": 4.6497781318013764, "total_flos": 4437313023611473920, "step": 1155800 }, { "loss": 2.8325, "learning_rate": 7.053140880314392e-07, "epoch": 4.6501804313455715, "total_flos": 4437693409469460480, "step": 1155900 }, { "loss": 2.8075, "learning_rate": 7.045029626355086e-07, "epoch": 4.650582730889766, "total_flos": 4438089128883793920, "step": 1156000 }, { "loss": 2.795, "learning_rate": 7.036918372395782e-07, "epoch": 4.650985030433961, "total_flos": 4438485613117009920, "step": 1156100 }, { "loss": 2.7475, "learning_rate": 7.028807118436476e-07, "epoch": 4.651387329978155, "total_flos": 4438866439808102400, "step": 1156200 }, { "loss": 2.72, "learning_rate": 7.02069586447717e-07, "epoch": 4.65178962952235, "total_flos": 4439244579010621440, "step": 1156300 }, { "loss": 2.7975, "learning_rate": 7.012584610517864e-07, "epoch": 4.652191929066545, "total_flos": 4439617306057297920, "step": 1156400 }, { "loss": 2.75, "learning_rate": 7.004473356558558e-07, "epoch": 4.652594228610739, "total_flos": 4440006359862620160, "step": 1156500 }, { "loss": 2.81, "learning_rate": 6.996362102599252e-07, "epoch": 4.652996528154934, "total_flos": 4440381960291348480, "step": 1156600 }, { "loss": 2.83, "learning_rate": 6.988250848639946e-07, "epoch": 4.653398827699128, "total_flos": 4440760078248898560, "step": 1156700 }, { "loss": 2.765, "learning_rate": 6.98013959468064e-07, "epoch": 4.653801127243323, "total_flos": 4441137739439616000, "step": 1156800 }, { "loss": 2.7725, "learning_rate": 6.972028340721334e-07, "epoch": 4.654203426787517, "total_flos": 4441517440147353600, "step": 1156900 }, { "loss": 2.85, "learning_rate": 6.963917086762028e-07, "epoch": 4.654605726331712, "total_flos": 4441902446786088960, "step": 1157000 }, { "loss": 2.7925, "learning_rate": 6.955805832802723e-07, "epoch": 4.655008025875906, "total_flos": 4442289009618800640, "step": 1157100 }, { "loss": 2.7725, "learning_rate": 6.947694578843417e-07, "epoch": 4.655410325420101, "total_flos": 4442678095291576320, "step": 1157200 }, { "loss": 2.745, "learning_rate": 6.939583324884111e-07, "epoch": 4.655812624964296, "total_flos": 4443069167368949760, "step": 1157300 }, { "loss": 2.7375, "learning_rate": 6.931472070924806e-07, "epoch": 4.6562149245084905, "total_flos": 4443475663293788160, "step": 1157400 }, { "loss": 2.81, "learning_rate": 6.9233608169655e-07, "epoch": 4.6566172240526855, "total_flos": 4443859002202460160, "step": 1157500 }, { "loss": 2.765, "learning_rate": 6.915249563006194e-07, "epoch": 4.65701952359688, "total_flos": 4444241188571566080, "step": 1157600 }, { "loss": 2.79, "learning_rate": 6.907138309046888e-07, "epoch": 4.657421823141075, "total_flos": 4444619646448619520, "step": 1157700 }, { "loss": 2.81, "learning_rate": 6.899027055087582e-07, "epoch": 4.657824122685269, "total_flos": 4444994726375608320, "step": 1157800 }, { "loss": 2.8025, "learning_rate": 6.890915801128276e-07, "epoch": 4.658226422229464, "total_flos": 4445373359523655680, "step": 1157900 }, { "loss": 2.7725, "learning_rate": 6.88280454716897e-07, "epoch": 4.658628721773658, "total_flos": 4445747871147724800, "step": 1158000 }, { "loss": 2.81, "learning_rate": 6.874693293209664e-07, "epoch": 4.659031021317853, "total_flos": 4446121076206202880, "step": 1158100 }, { "loss": 2.815, "learning_rate": 6.866582039250358e-07, "epoch": 4.659433320862047, "total_flos": 4446502147214438400, "step": 1158200 }, { "loss": 2.785, "learning_rate": 6.858470785291052e-07, "epoch": 4.659835620406242, "total_flos": 4446898349951815680, "step": 1158300 }, { "loss": 2.73, "learning_rate": 6.850359531331746e-07, "epoch": 4.660237919950436, "total_flos": 4447286867321671680, "step": 1158400 }, { "loss": 2.8075, "learning_rate": 6.842248277372441e-07, "epoch": 4.660640219494631, "total_flos": 4447660205161205760, "step": 1158500 }, { "loss": 2.765, "learning_rate": 6.834137023413136e-07, "epoch": 4.661042519038826, "total_flos": 4448033957277634560, "step": 1158600 }, { "loss": 2.77, "learning_rate": 6.82602576945383e-07, "epoch": 4.66144481858302, "total_flos": 4448397490563993600, "step": 1158700 }, { "loss": 2.785, "learning_rate": 6.817914515494524e-07, "epoch": 4.661847118127215, "total_flos": 4448782709652418560, "step": 1158800 }, { "loss": 2.7725, "learning_rate": 6.809803261535218e-07, "epoch": 4.6622494176714095, "total_flos": 4449161210019409920, "step": 1158900 }, { "loss": 2.7825, "learning_rate": 6.801692007575912e-07, "epoch": 4.662651717215605, "total_flos": 4449541319692800000, "step": 1159000 }, { "loss": 2.7125, "learning_rate": 6.793580753616606e-07, "epoch": 4.663054016759799, "total_flos": 4449919565120163840, "step": 1159100 }, { "loss": 2.795, "learning_rate": 6.7854694996573e-07, "epoch": 4.663456316303994, "total_flos": 4450292610841374720, "step": 1159200 }, { "loss": 2.7375, "learning_rate": 6.777358245697994e-07, "epoch": 4.663858615848188, "total_flos": 4450673310062653440, "step": 1159300 }, { "loss": 2.825, "learning_rate": 6.769246991738688e-07, "epoch": 4.664260915392383, "total_flos": 4451060552734371840, "step": 1159400 }, { "loss": 2.795, "learning_rate": 6.761135737779382e-07, "epoch": 4.664663214936578, "total_flos": 4451448326530314240, "step": 1159500 }, { "loss": 2.78, "learning_rate": 6.753024483820076e-07, "epoch": 4.665065514480772, "total_flos": 4451822546036060160, "step": 1159600 }, { "loss": 2.7525, "learning_rate": 6.74491322986077e-07, "epoch": 4.665467814024967, "total_flos": 4452226030486548480, "step": 1159700 }, { "loss": 2.76, "learning_rate": 6.736801975901464e-07, "epoch": 4.665870113569161, "total_flos": 4452625149095915520, "step": 1159800 }, { "loss": 2.7625, "learning_rate": 6.72869072194216e-07, "epoch": 4.666272413113356, "total_flos": 4452993398765383680, "step": 1159900 }, { "loss": 2.685, "learning_rate": 6.720579467982854e-07, "epoch": 4.66667471265755, "total_flos": 4453380312140083200, "step": 1160000 }, { "loss": 2.76, "learning_rate": 6.712468214023548e-07, "epoch": 4.667077012201745, "total_flos": 4453762578177822720, "step": 1160100 }, { "loss": 2.7675, "learning_rate": 6.704356960064242e-07, "epoch": 4.667479311745939, "total_flos": 4454164559546757120, "step": 1160200 }, { "loss": 2.7625, "learning_rate": 6.696245706104936e-07, "epoch": 4.6678816112901345, "total_flos": 4454548079037665280, "step": 1160300 }, { "loss": 2.83, "learning_rate": 6.68813445214563e-07, "epoch": 4.6682839108343295, "total_flos": 4454930291962982400, "step": 1160400 }, { "loss": 2.7675, "learning_rate": 6.680023198186324e-07, "epoch": 4.668686210378524, "total_flos": 4455319659131596800, "step": 1160500 }, { "loss": 2.79, "learning_rate": 6.671911944227018e-07, "epoch": 4.669088509922719, "total_flos": 4455680765180252160, "step": 1160600 }, { "loss": 2.7725, "learning_rate": 6.663800690267712e-07, "epoch": 4.669490809466913, "total_flos": 4456058591019479040, "step": 1160700 }, { "loss": 2.7725, "learning_rate": 6.655689436308406e-07, "epoch": 4.669893109011108, "total_flos": 4456429294482862080, "step": 1160800 }, { "loss": 2.775, "learning_rate": 6.647578182349101e-07, "epoch": 4.670295408555302, "total_flos": 4456794622969098240, "step": 1160900 }, { "loss": 2.7575, "learning_rate": 6.639466928389795e-07, "epoch": 4.670697708099497, "total_flos": 4457171460917268480, "step": 1161000 }, { "loss": 2.79, "learning_rate": 6.63135567443049e-07, "epoch": 4.671100007643691, "total_flos": 4457548814055936000, "step": 1161100 }, { "loss": 2.7375, "learning_rate": 6.623244420471184e-07, "epoch": 4.671502307187886, "total_flos": 4457922406835097600, "step": 1161200 }, { "loss": 2.7975, "learning_rate": 6.615133166511878e-07, "epoch": 4.67190460673208, "total_flos": 4458282859600957440, "step": 1161300 }, { "loss": 2.745, "learning_rate": 6.607021912552572e-07, "epoch": 4.672306906276275, "total_flos": 4458677718594048000, "step": 1161400 }, { "loss": 2.825, "learning_rate": 6.598910658593266e-07, "epoch": 4.672709205820469, "total_flos": 4459064706326138880, "step": 1161500 }, { "loss": 2.7475, "learning_rate": 6.59079940463396e-07, "epoch": 4.673111505364664, "total_flos": 4459435946224988160, "step": 1161600 }, { "loss": 2.7575, "learning_rate": 6.582688150674654e-07, "epoch": 4.673513804908859, "total_flos": 4459829259646586880, "step": 1161700 }, { "loss": 2.75, "learning_rate": 6.574576896715348e-07, "epoch": 4.6739161044530535, "total_flos": 4460212051497308160, "step": 1161800 }, { "loss": 2.7825, "learning_rate": 6.566465642756042e-07, "epoch": 4.6743184039972485, "total_flos": 4460618781116805120, "step": 1161900 }, { "loss": 2.76, "learning_rate": 6.558354388796736e-07, "epoch": 4.674720703541443, "total_flos": 4461001440186470400, "step": 1162000 }, { "loss": 2.785, "learning_rate": 6.55024313483743e-07, "epoch": 4.675123003085638, "total_flos": 4461385012789800960, "step": 1162100 }, { "loss": 2.785, "learning_rate": 6.542131880878124e-07, "epoch": 4.675525302629832, "total_flos": 4461777157738106880, "step": 1162200 }, { "loss": 2.795, "learning_rate": 6.534020626918818e-07, "epoch": 4.675927602174027, "total_flos": 4462162716746035200, "step": 1162300 }, { "loss": 2.75, "learning_rate": 6.525909372959513e-07, "epoch": 4.676329901718221, "total_flos": 4462532974065070080, "step": 1162400 }, { "loss": 2.75, "learning_rate": 6.517798119000207e-07, "epoch": 4.676732201262416, "total_flos": 4462918272822128640, "step": 1162500 }, { "loss": 2.825, "learning_rate": 6.509686865040901e-07, "epoch": 4.677134500806611, "total_flos": 4463300113960488960, "step": 1162600 }, { "loss": 2.765, "learning_rate": 6.501575611081596e-07, "epoch": 4.677536800350805, "total_flos": 4463684711633571840, "step": 1162700 }, { "loss": 2.765, "learning_rate": 6.49346435712229e-07, "epoch": 4.677939099895, "total_flos": 4464069149969387520, "step": 1162800 }, { "loss": 2.78, "learning_rate": 6.485353103162984e-07, "epoch": 4.678341399439194, "total_flos": 4464453811377377280, "step": 1162900 }, { "loss": 2.735, "learning_rate": 6.477241849203678e-07, "epoch": 4.678743698983389, "total_flos": 4464842950162575360, "step": 1163000 }, { "loss": 2.7725, "learning_rate": 6.469130595244372e-07, "epoch": 4.679145998527583, "total_flos": 4465223925568450560, "step": 1163100 }, { "loss": 2.7975, "learning_rate": 6.461019341285066e-07, "epoch": 4.679548298071778, "total_flos": 4465607237920911360, "step": 1163200 }, { "loss": 2.7675, "learning_rate": 6.452908087325761e-07, "epoch": 4.6799505976159725, "total_flos": 4465987522865295360, "step": 1163300 }, { "loss": 2.815, "learning_rate": 6.444796833366455e-07, "epoch": 4.680352897160168, "total_flos": 4466386349356339200, "step": 1163400 }, { "loss": 2.795, "learning_rate": 6.43668557940715e-07, "epoch": 4.680755196704363, "total_flos": 4466787836779745280, "step": 1163500 }, { "loss": 2.755, "learning_rate": 6.428574325447844e-07, "epoch": 4.681157496248557, "total_flos": 4467155879310766080, "step": 1163600 }, { "loss": 2.775, "learning_rate": 6.420463071488538e-07, "epoch": 4.681559795792752, "total_flos": 4467539181040742400, "step": 1163700 }, { "loss": 2.7625, "learning_rate": 6.412351817529232e-07, "epoch": 4.681962095336946, "total_flos": 4467920958444195840, "step": 1163800 }, { "loss": 2.745, "learning_rate": 6.404240563569926e-07, "epoch": 4.682364394881141, "total_flos": 4468302810205040640, "step": 1163900 }, { "loss": 2.7725, "learning_rate": 6.39612930961062e-07, "epoch": 4.682766694425335, "total_flos": 4468682733984952320, "step": 1164000 }, { "loss": 2.765, "learning_rate": 6.388018055651314e-07, "epoch": 4.68316899396953, "total_flos": 4469055036132249600, "step": 1164100 }, { "loss": 2.6925, "learning_rate": 6.379906801692008e-07, "epoch": 4.683571293513724, "total_flos": 4469436601086013440, "step": 1164200 }, { "loss": 2.76, "learning_rate": 6.371795547732702e-07, "epoch": 4.683973593057919, "total_flos": 4469804542703431680, "step": 1164300 }, { "loss": 2.7875, "learning_rate": 6.363684293773396e-07, "epoch": 4.684375892602114, "total_flos": 4470184301834833920, "step": 1164400 }, { "loss": 2.77, "learning_rate": 6.35557303981409e-07, "epoch": 4.684778192146308, "total_flos": 4470558446983188480, "step": 1164500 }, { "loss": 2.75, "learning_rate": 6.347461785854784e-07, "epoch": 4.685180491690502, "total_flos": 4470954894037708800, "step": 1164600 }, { "loss": 2.7975, "learning_rate": 6.339350531895478e-07, "epoch": 4.6855827912346975, "total_flos": 4471336331521658880, "step": 1164700 }, { "loss": 2.7775, "learning_rate": 6.331239277936172e-07, "epoch": 4.6859850907788925, "total_flos": 4471718162037534720, "step": 1164800 }, { "loss": 2.7525, "learning_rate": 6.323128023976867e-07, "epoch": 4.686387390323087, "total_flos": 4472106153594408960, "step": 1164900 }, { "loss": 2.7725, "learning_rate": 6.315016770017561e-07, "epoch": 4.686789689867282, "total_flos": 4472481467216056320, "step": 1165000 }, { "loss": 2.77, "learning_rate": 6.306905516058255e-07, "epoch": 4.687191989411476, "total_flos": 4472869639355166720, "step": 1165100 }, { "loss": 2.77, "learning_rate": 6.298794262098949e-07, "epoch": 4.687594288955671, "total_flos": 4473255697619865600, "step": 1165200 }, { "loss": 2.7475, "learning_rate": 6.290683008139643e-07, "epoch": 4.687996588499865, "total_flos": 4473651799443640320, "step": 1165300 }, { "loss": 2.73, "learning_rate": 6.282571754180337e-07, "epoch": 4.68839888804406, "total_flos": 4474038149826662400, "step": 1165400 }, { "loss": 2.7325, "learning_rate": 6.274460500221031e-07, "epoch": 4.688801187588254, "total_flos": 4474433364672983040, "step": 1165500 }, { "loss": 2.8, "learning_rate": 6.266349246261727e-07, "epoch": 4.689203487132449, "total_flos": 4474801099151953920, "step": 1165600 }, { "loss": 2.7875, "learning_rate": 6.258237992302421e-07, "epoch": 4.689605786676644, "total_flos": 4475197710854983680, "step": 1165700 }, { "loss": 2.7325, "learning_rate": 6.250126738343115e-07, "epoch": 4.690008086220838, "total_flos": 4475584953526702080, "step": 1165800 }, { "loss": 2.795, "learning_rate": 6.242015484383808e-07, "epoch": 4.690410385765033, "total_flos": 4475965429675806720, "step": 1165900 }, { "loss": 2.775, "learning_rate": 6.233904230424502e-07, "epoch": 4.690812685309227, "total_flos": 4476351934084853760, "step": 1166000 }, { "loss": 2.78, "learning_rate": 6.225792976465197e-07, "epoch": 4.691214984853422, "total_flos": 4476726270437928960, "step": 1166100 }, { "loss": 2.7475, "learning_rate": 6.217681722505892e-07, "epoch": 4.6916172843976165, "total_flos": 4477103124319825920, "step": 1166200 }, { "loss": 2.73, "learning_rate": 6.209570468546586e-07, "epoch": 4.6920195839418115, "total_flos": 4477498535682109440, "step": 1166300 }, { "loss": 2.79, "learning_rate": 6.20145921458728e-07, "epoch": 4.692421883486006, "total_flos": 4477874571632701440, "step": 1166400 }, { "loss": 2.795, "learning_rate": 6.193347960627974e-07, "epoch": 4.692824183030201, "total_flos": 4478268963236474880, "step": 1166500 }, { "loss": 2.835, "learning_rate": 6.185236706668668e-07, "epoch": 4.693226482574396, "total_flos": 4478652753600737280, "step": 1166600 }, { "loss": 2.8225, "learning_rate": 6.177125452709362e-07, "epoch": 4.69362878211859, "total_flos": 4479043156461588480, "step": 1166700 }, { "loss": 2.78, "learning_rate": 6.169014198750056e-07, "epoch": 4.694031081662785, "total_flos": 4479429639625666560, "step": 1166800 }, { "loss": 2.7725, "learning_rate": 6.16090294479075e-07, "epoch": 4.694433381206979, "total_flos": 4479819644143349760, "step": 1166900 }, { "loss": 2.74, "learning_rate": 6.152791690831444e-07, "epoch": 4.694835680751174, "total_flos": 4480230070387445760, "step": 1167000 }, { "loss": 2.7825, "learning_rate": 6.144680436872138e-07, "epoch": 4.695237980295368, "total_flos": 4480607306678784000, "step": 1167100 }, { "loss": 2.8125, "learning_rate": 6.136569182912832e-07, "epoch": 4.695640279839563, "total_flos": 4480984171183165440, "step": 1167200 }, { "loss": 2.81, "learning_rate": 6.128457928953527e-07, "epoch": 4.696042579383757, "total_flos": 4481364843848232960, "step": 1167300 }, { "loss": 2.74, "learning_rate": 6.120346674994222e-07, "epoch": 4.696444878927952, "total_flos": 4481754837743431680, "step": 1167400 }, { "loss": 2.795, "learning_rate": 6.112235421034916e-07, "epoch": 4.696847178472147, "total_flos": 4482131213613527040, "step": 1167500 }, { "loss": 2.785, "learning_rate": 6.10412416707561e-07, "epoch": 4.697249478016341, "total_flos": 4482521945771397120, "step": 1167600 }, { "loss": 2.77, "learning_rate": 6.096012913116304e-07, "epoch": 4.6976517775605355, "total_flos": 4482907419799449600, "step": 1167700 }, { "loss": 2.7925, "learning_rate": 6.087901659156998e-07, "epoch": 4.6980540771047306, "total_flos": 4483292134319861760, "step": 1167800 }, { "loss": 2.745, "learning_rate": 6.079790405197692e-07, "epoch": 4.698456376648926, "total_flos": 4483688453904568320, "step": 1167900 }, { "loss": 2.7375, "learning_rate": 6.071679151238386e-07, "epoch": 4.69885867619312, "total_flos": 4484072116799016960, "step": 1168000 }, { "loss": 2.8125, "learning_rate": 6.06356789727908e-07, "epoch": 4.699260975737315, "total_flos": 4484439622894571520, "step": 1168100 }, { "loss": 2.745, "learning_rate": 6.055456643319774e-07, "epoch": 4.699663275281509, "total_flos": 4484820906352496640, "step": 1168200 }, { "loss": 2.7575, "learning_rate": 6.047345389360468e-07, "epoch": 4.700065574825704, "total_flos": 4485221958254039040, "step": 1168300 }, { "loss": 2.8125, "learning_rate": 6.039234135401162e-07, "epoch": 4.700467874369898, "total_flos": 4485592619227484160, "step": 1168400 }, { "loss": 2.86, "learning_rate": 6.031122881441856e-07, "epoch": 4.700870173914093, "total_flos": 4485978847451934720, "step": 1168500 }, { "loss": 2.7625, "learning_rate": 6.023011627482552e-07, "epoch": 4.701272473458287, "total_flos": 4486367901257256960, "step": 1168600 }, { "loss": 2.845, "learning_rate": 6.014900373523246e-07, "epoch": 4.701674773002482, "total_flos": 4486752605155184640, "step": 1168700 }, { "loss": 2.7325, "learning_rate": 6.00678911956394e-07, "epoch": 4.702077072546677, "total_flos": 4487148080252375040, "step": 1168800 }, { "loss": 2.7625, "learning_rate": 5.998677865604634e-07, "epoch": 4.702479372090871, "total_flos": 4487533687061483520, "step": 1168900 }, { "loss": 2.7675, "learning_rate": 5.990566611645328e-07, "epoch": 4.702881671635066, "total_flos": 4487913031915991040, "step": 1169000 }, { "loss": 2.7225, "learning_rate": 5.982455357686022e-07, "epoch": 4.7032839711792604, "total_flos": 4488306605588459520, "step": 1169100 }, { "loss": 2.7275, "learning_rate": 5.974344103726716e-07, "epoch": 4.7036862707234555, "total_flos": 4488693667677941760, "step": 1169200 }, { "loss": 2.86, "learning_rate": 5.96623284976741e-07, "epoch": 4.70408857026765, "total_flos": 4489068471420334080, "step": 1169300 }, { "loss": 2.7775, "learning_rate": 5.958121595808104e-07, "epoch": 4.704490869811845, "total_flos": 4489448761675960320, "step": 1169400 }, { "loss": 2.765, "learning_rate": 5.950010341848798e-07, "epoch": 4.704893169356039, "total_flos": 4489836477048238080, "step": 1169500 }, { "loss": 2.765, "learning_rate": 5.941899087889492e-07, "epoch": 4.705295468900234, "total_flos": 4490232573560770560, "step": 1169600 }, { "loss": 2.7825, "learning_rate": 5.933787833930186e-07, "epoch": 4.705697768444429, "total_flos": 4490611344801116160, "step": 1169700 }, { "loss": 2.8075, "learning_rate": 5.925676579970882e-07, "epoch": 4.706100067988623, "total_flos": 4491017368025395200, "step": 1169800 }, { "loss": 2.7825, "learning_rate": 5.917565326011576e-07, "epoch": 4.706502367532818, "total_flos": 4491391571597414400, "step": 1169900 }, { "loss": 2.79, "learning_rate": 5.90945407205227e-07, "epoch": 4.706904667077012, "total_flos": 4491776567613665280, "step": 1170000 }, { "loss": 2.7725, "learning_rate": 5.901342818092964e-07, "epoch": 4.707306966621207, "total_flos": 4492172398564085760, "step": 1170100 }, { "loss": 2.8175, "learning_rate": 5.893231564133658e-07, "epoch": 4.707709266165401, "total_flos": 4492549045307535360, "step": 1170200 }, { "loss": 2.7975, "learning_rate": 5.885120310174352e-07, "epoch": 4.708111565709596, "total_flos": 4492931853091983360, "step": 1170300 }, { "loss": 2.7875, "learning_rate": 5.877009056215046e-07, "epoch": 4.70851386525379, "total_flos": 4493313194973573120, "step": 1170400 }, { "loss": 2.8125, "learning_rate": 5.86889780225574e-07, "epoch": 4.708916164797985, "total_flos": 4493683643497328640, "step": 1170500 }, { "loss": 2.755, "learning_rate": 5.860786548296434e-07, "epoch": 4.70931846434218, "total_flos": 4494065043802583040, "step": 1170600 }, { "loss": 2.7925, "learning_rate": 5.852675294337128e-07, "epoch": 4.7097207638863745, "total_flos": 4494468129909903360, "step": 1170700 }, { "loss": 2.76, "learning_rate": 5.844564040377822e-07, "epoch": 4.710123063430569, "total_flos": 4494863084505354240, "step": 1170800 }, { "loss": 2.795, "learning_rate": 5.836452786418516e-07, "epoch": 4.710525362974764, "total_flos": 4495231806875381760, "step": 1170900 }, { "loss": 2.765, "learning_rate": 5.828341532459212e-07, "epoch": 4.710927662518959, "total_flos": 4495635721536491520, "step": 1171000 }, { "loss": 2.8075, "learning_rate": 5.820230278499906e-07, "epoch": 4.711329962063153, "total_flos": 4496031722446663680, "step": 1171100 }, { "loss": 2.77, "learning_rate": 5.8121190245406e-07, "epoch": 4.711732261607348, "total_flos": 4496418981052108800, "step": 1171200 }, { "loss": 2.8, "learning_rate": 5.804007770581294e-07, "epoch": 4.712134561151542, "total_flos": 4496800657541959680, "step": 1171300 }, { "loss": 2.6875, "learning_rate": 5.795896516621988e-07, "epoch": 4.712536860695737, "total_flos": 4497178063793049600, "step": 1171400 }, { "loss": 2.795, "learning_rate": 5.787785262662682e-07, "epoch": 4.712939160239931, "total_flos": 4497553515506995200, "step": 1171500 }, { "loss": 2.78, "learning_rate": 5.779674008703376e-07, "epoch": 4.713341459784126, "total_flos": 4497919497276026880, "step": 1171600 }, { "loss": 2.765, "learning_rate": 5.77156275474407e-07, "epoch": 4.71374375932832, "total_flos": 4498304158684016640, "step": 1171700 }, { "loss": 2.7675, "learning_rate": 5.763451500784764e-07, "epoch": 4.714146058872515, "total_flos": 4498679360769576960, "step": 1171800 }, { "loss": 2.7, "learning_rate": 5.755340246825458e-07, "epoch": 4.71454835841671, "total_flos": 4499057653998120960, "step": 1171900 }, { "loss": 2.795, "learning_rate": 5.747228992866152e-07, "epoch": 4.714950657960904, "total_flos": 4499453022870466560, "step": 1172000 }, { "loss": 2.775, "learning_rate": 5.739117738906847e-07, "epoch": 4.715352957505099, "total_flos": 4499844254285107200, "step": 1172100 }, { "loss": 2.77, "learning_rate": 5.731006484947542e-07, "epoch": 4.7157552570492935, "total_flos": 4500225973264896000, "step": 1172200 }, { "loss": 2.7925, "learning_rate": 5.722895230988236e-07, "epoch": 4.716157556593489, "total_flos": 4500609922966425600, "step": 1172300 }, { "loss": 2.79, "learning_rate": 5.71478397702893e-07, "epoch": 4.716559856137683, "total_flos": 4501000506409512960, "step": 1172400 }, { "loss": 2.725, "learning_rate": 5.706672723069624e-07, "epoch": 4.716962155681878, "total_flos": 4501393150614589440, "step": 1172500 }, { "loss": 2.7925, "learning_rate": 5.698561469110318e-07, "epoch": 4.717364455226072, "total_flos": 4501788769115320320, "step": 1172600 }, { "loss": 2.7875, "learning_rate": 5.690450215151012e-07, "epoch": 4.717766754770267, "total_flos": 4502176601334927360, "step": 1172700 }, { "loss": 2.7875, "learning_rate": 5.682338961191706e-07, "epoch": 4.718169054314462, "total_flos": 4502551792798003200, "step": 1172800 }, { "loss": 2.735, "learning_rate": 5.6742277072324e-07, "epoch": 4.718571353858656, "total_flos": 4502943029523886080, "step": 1172900 }, { "loss": 2.7675, "learning_rate": 5.666116453273094e-07, "epoch": 4.718973653402851, "total_flos": 4503329294927032320, "step": 1173000 }, { "loss": 2.805, "learning_rate": 5.658005199313788e-07, "epoch": 4.719375952947045, "total_flos": 4503704932534456320, "step": 1173100 }, { "loss": 2.7425, "learning_rate": 5.649893945354482e-07, "epoch": 4.71977825249124, "total_flos": 4504087426955612160, "step": 1173200 }, { "loss": 2.8425, "learning_rate": 5.641782691395177e-07, "epoch": 4.720180552035434, "total_flos": 4504463994030428160, "step": 1173300 }, { "loss": 2.7175, "learning_rate": 5.633671437435872e-07, "epoch": 4.720582851579629, "total_flos": 4504841825180897280, "step": 1173400 }, { "loss": 2.7375, "learning_rate": 5.625560183476566e-07, "epoch": 4.720985151123823, "total_flos": 4505219953760931840, "step": 1173500 }, { "loss": 2.755, "learning_rate": 5.61744892951726e-07, "epoch": 4.7213874506680185, "total_flos": 4505617383395266560, "step": 1173600 }, { "loss": 2.785, "learning_rate": 5.609337675557954e-07, "epoch": 4.7217897502122135, "total_flos": 4506001816419840000, "step": 1173700 }, { "loss": 2.755, "learning_rate": 5.601226421598648e-07, "epoch": 4.722192049756408, "total_flos": 4506378521586954240, "step": 1173800 }, { "loss": 2.77, "learning_rate": 5.593115167639342e-07, "epoch": 4.722594349300603, "total_flos": 4506756278380032000, "step": 1173900 }, { "loss": 2.6875, "learning_rate": 5.585003913680036e-07, "epoch": 4.722996648844797, "total_flos": 4507153081287782400, "step": 1174000 }, { "loss": 2.84, "learning_rate": 5.57689265972073e-07, "epoch": 4.723398948388992, "total_flos": 4507541035665960960, "step": 1174100 }, { "loss": 2.7625, "learning_rate": 5.568781405761424e-07, "epoch": 4.723801247933186, "total_flos": 4507929807975444480, "step": 1174200 }, { "loss": 2.7225, "learning_rate": 5.560670151802118e-07, "epoch": 4.724203547477381, "total_flos": 4508312589203681280, "step": 1174300 }, { "loss": 2.8275, "learning_rate": 5.552558897842812e-07, "epoch": 4.724605847021575, "total_flos": 4508699321996144640, "step": 1174400 }, { "loss": 2.7875, "learning_rate": 5.544447643883507e-07, "epoch": 4.72500814656577, "total_flos": 4509077907343011840, "step": 1174500 }, { "loss": 2.7775, "learning_rate": 5.536336389924201e-07, "epoch": 4.725410446109964, "total_flos": 4509477636745236480, "step": 1174600 }, { "loss": 2.7875, "learning_rate": 5.528225135964896e-07, "epoch": 4.725812745654159, "total_flos": 4509852100568125440, "step": 1174700 }, { "loss": 2.7325, "learning_rate": 5.52011388200559e-07, "epoch": 4.726215045198353, "total_flos": 4510225358739025920, "step": 1174800 }, { "loss": 2.7225, "learning_rate": 5.512002628046284e-07, "epoch": 4.726617344742548, "total_flos": 4510624732288020480, "step": 1174900 }, { "loss": 2.7975, "learning_rate": 5.503891374086978e-07, "epoch": 4.727019644286743, "total_flos": 4511014673070796800, "step": 1175000 }, { "loss": 2.7625, "learning_rate": 5.495780120127672e-07, "epoch": 4.7274219438309375, "total_flos": 4511378349760696320, "step": 1175100 }, { "loss": 2.7475, "learning_rate": 5.487668866168366e-07, "epoch": 4.7278242433751325, "total_flos": 4511761364683591680, "step": 1175200 }, { "loss": 2.7825, "learning_rate": 5.47955761220906e-07, "epoch": 4.728226542919327, "total_flos": 4512147343279656960, "step": 1175300 }, { "loss": 2.745, "learning_rate": 5.471446358249754e-07, "epoch": 4.728628842463522, "total_flos": 4512524972602920960, "step": 1175400 }, { "loss": 2.7625, "learning_rate": 5.463335104290448e-07, "epoch": 4.729031142007716, "total_flos": 4512897635914690560, "step": 1175500 }, { "loss": 2.745, "learning_rate": 5.455223850331142e-07, "epoch": 4.729433441551911, "total_flos": 4513281474080133120, "step": 1175600 }, { "loss": 2.8025, "learning_rate": 5.447112596371837e-07, "epoch": 4.729835741096105, "total_flos": 4513659634527621120, "step": 1175700 }, { "loss": 2.7975, "learning_rate": 5.439001342412531e-07, "epoch": 4.7302380406403, "total_flos": 4514046117691699200, "step": 1175800 }, { "loss": 2.78, "learning_rate": 5.430890088453226e-07, "epoch": 4.730640340184495, "total_flos": 4514428643980308480, "step": 1175900 }, { "loss": 2.7825, "learning_rate": 5.42277883449392e-07, "epoch": 4.731042639728689, "total_flos": 4514811106534010880, "step": 1176000 }, { "loss": 2.7525, "learning_rate": 5.414667580534614e-07, "epoch": 4.731444939272884, "total_flos": 4515198386384424960, "step": 1176100 }, { "loss": 2.765, "learning_rate": 5.406556326575308e-07, "epoch": 4.731847238817078, "total_flos": 4515578214561976320, "step": 1176200 }, { "loss": 2.765, "learning_rate": 5.398445072616002e-07, "epoch": 4.732249538361273, "total_flos": 4515960092879032320, "step": 1176300 }, { "loss": 2.77, "learning_rate": 5.390333818656696e-07, "epoch": 4.732651837905467, "total_flos": 4516333659101982720, "step": 1176400 }, { "loss": 2.8225, "learning_rate": 5.38222256469739e-07, "epoch": 4.733054137449662, "total_flos": 4516719202176184320, "step": 1176500 }, { "loss": 2.73, "learning_rate": 5.374111310738084e-07, "epoch": 4.7334564369938565, "total_flos": 4517102748223303680, "step": 1176600 }, { "loss": 2.775, "learning_rate": 5.366000056778778e-07, "epoch": 4.733858736538052, "total_flos": 4517484706208993280, "step": 1176700 }, { "loss": 2.7575, "learning_rate": 5.357888802819472e-07, "epoch": 4.734261036082247, "total_flos": 4517865134556917760, "step": 1176800 }, { "loss": 2.87, "learning_rate": 5.349777548860166e-07, "epoch": 4.734663335626441, "total_flos": 4518253912177643520, "step": 1176900 }, { "loss": 2.85, "learning_rate": 5.34166629490086e-07, "epoch": 4.735065635170636, "total_flos": 4518654124902912000, "step": 1177000 }, { "loss": 2.775, "learning_rate": 5.333555040941555e-07, "epoch": 4.73546793471483, "total_flos": 4519046179560099840, "step": 1177100 }, { "loss": 2.785, "learning_rate": 5.32544378698225e-07, "epoch": 4.735870234259025, "total_flos": 4519424600258457600, "step": 1177200 }, { "loss": 2.7425, "learning_rate": 5.317332533022944e-07, "epoch": 4.736272533803219, "total_flos": 4519810589477007360, "step": 1177300 }, { "loss": 2.7375, "learning_rate": 5.309221279063638e-07, "epoch": 4.736674833347414, "total_flos": 4520194162080337920, "step": 1177400 }, { "loss": 2.8075, "learning_rate": 5.301110025104332e-07, "epoch": 4.737077132891608, "total_flos": 4520575429604536320, "step": 1177500 }, { "loss": 2.7975, "learning_rate": 5.292998771145026e-07, "epoch": 4.737479432435803, "total_flos": 4520972726457815040, "step": 1177600 }, { "loss": 2.77, "learning_rate": 5.28488751718572e-07, "epoch": 4.737881731979997, "total_flos": 4521363363013324800, "step": 1177700 }, { "loss": 2.74, "learning_rate": 5.276776263226414e-07, "epoch": 4.738284031524192, "total_flos": 4521733848715776000, "step": 1177800 }, { "loss": 2.76, "learning_rate": 5.268665009267108e-07, "epoch": 4.738686331068386, "total_flos": 4522116635255255040, "step": 1177900 }, { "loss": 2.77, "learning_rate": 5.260553755307802e-07, "epoch": 4.7390886306125815, "total_flos": 4522522047686676480, "step": 1178000 }, { "loss": 2.8, "learning_rate": 5.252442501348496e-07, "epoch": 4.7394909301567765, "total_flos": 4522923790049710080, "step": 1178100 }, { "loss": 2.795, "learning_rate": 5.24433124738919e-07, "epoch": 4.739893229700971, "total_flos": 4523307946889687040, "step": 1178200 }, { "loss": 2.78, "learning_rate": 5.236219993429884e-07, "epoch": 4.740295529245166, "total_flos": 4523695163005194240, "step": 1178300 }, { "loss": 2.75, "learning_rate": 5.228108739470578e-07, "epoch": 4.74069782878936, "total_flos": 4524084785113436160, "step": 1178400 }, { "loss": 2.745, "learning_rate": 5.219997485511274e-07, "epoch": 4.741100128333555, "total_flos": 4524482522799820800, "step": 1178500 }, { "loss": 2.7725, "learning_rate": 5.211886231551968e-07, "epoch": 4.741502427877749, "total_flos": 4524864682612715520, "step": 1178600 }, { "loss": 2.7825, "learning_rate": 5.203774977592662e-07, "epoch": 4.741904727421944, "total_flos": 4525257358685245440, "step": 1178700 }, { "loss": 2.775, "learning_rate": 5.195663723633356e-07, "epoch": 4.742307026966138, "total_flos": 4525641451790315520, "step": 1178800 }, { "loss": 2.815, "learning_rate": 5.18755246967405e-07, "epoch": 4.742709326510333, "total_flos": 4526025470537994240, "step": 1178900 }, { "loss": 2.79, "learning_rate": 5.179441215714744e-07, "epoch": 4.743111626054528, "total_flos": 4526414619945676800, "step": 1179000 }, { "loss": 2.815, "learning_rate": 5.171329961755438e-07, "epoch": 4.743513925598722, "total_flos": 4526788759782789120, "step": 1179100 }, { "loss": 2.81, "learning_rate": 5.163218707796132e-07, "epoch": 4.743916225142917, "total_flos": 4527156754512629760, "step": 1179200 }, { "loss": 2.7625, "learning_rate": 5.155107453836826e-07, "epoch": 4.744318524687111, "total_flos": 4527537639627386880, "step": 1179300 }, { "loss": 2.78, "learning_rate": 5.14699619987752e-07, "epoch": 4.744720824231306, "total_flos": 4527932849162465280, "step": 1179400 }, { "loss": 2.74, "learning_rate": 5.138884945918214e-07, "epoch": 4.7451231237755005, "total_flos": 4528316682016665600, "step": 1179500 }, { "loss": 2.7925, "learning_rate": 5.130773691958908e-07, "epoch": 4.7455254233196955, "total_flos": 4528690949323591680, "step": 1179600 }, { "loss": 2.8475, "learning_rate": 5.122662437999603e-07, "epoch": 4.74592772286389, "total_flos": 4529081978911027200, "step": 1179700 }, { "loss": 2.79, "learning_rate": 5.114551184040297e-07, "epoch": 4.746330022408085, "total_flos": 4529451811330682880, "step": 1179800 }, { "loss": 2.7475, "learning_rate": 5.106439930080992e-07, "epoch": 4.74673232195228, "total_flos": 4529843472955944960, "step": 1179900 }, { "loss": 2.8125, "learning_rate": 5.098328676121686e-07, "epoch": 4.747134621496474, "total_flos": 4530243154556989440, "step": 1180000 }, { "loss": 2.7675, "learning_rate": 5.09021742216238e-07, "epoch": 4.747536921040669, "total_flos": 4530642618397102080, "step": 1180100 }, { "loss": 2.745, "learning_rate": 5.082106168203074e-07, "epoch": 4.747939220584863, "total_flos": 4531023285750927360, "step": 1180200 }, { "loss": 2.7875, "learning_rate": 5.073994914243768e-07, "epoch": 4.748341520129058, "total_flos": 4531395651633131520, "step": 1180300 }, { "loss": 2.7725, "learning_rate": 5.065883660284462e-07, "epoch": 4.748743819673252, "total_flos": 4531788657002680320, "step": 1180400 }, { "loss": 2.7775, "learning_rate": 5.057772406325156e-07, "epoch": 4.749146119217447, "total_flos": 4532182772421857280, "step": 1180500 }, { "loss": 2.8, "learning_rate": 5.04966115236585e-07, "epoch": 4.749548418761641, "total_flos": 4532564305508167680, "step": 1180600 }, { "loss": 2.775, "learning_rate": 5.041549898406544e-07, "epoch": 4.749950718305836, "total_flos": 4532942030433792000, "step": 1180700 }, { "loss": 2.7925, "learning_rate": 5.033438644447238e-07, "epoch": 4.75035301785003, "total_flos": 4533315516988108800, "step": 1180800 }, { "loss": 2.7525, "learning_rate": 5.025327390487933e-07, "epoch": 4.750755317394225, "total_flos": 4533701176909639680, "step": 1180900 }, { "loss": 2.7675, "learning_rate": 5.017216136528627e-07, "epoch": 4.7511576169384195, "total_flos": 4534083713820733440, "step": 1181000 }, { "loss": 2.7925, "learning_rate": 5.009104882569321e-07, "epoch": 4.7515599164826146, "total_flos": 4534458167021137920, "step": 1181100 }, { "loss": 2.7325, "learning_rate": 5.000993628610015e-07, "epoch": 4.75196221602681, "total_flos": 4534841590909685760, "step": 1181200 }, { "loss": 2.75, "learning_rate": 4.99288237465071e-07, "epoch": 4.752364515571004, "total_flos": 4535245240008683520, "step": 1181300 }, { "loss": 2.7425, "learning_rate": 4.984771120691404e-07, "epoch": 4.752766815115199, "total_flos": 4535648150845009920, "step": 1181400 }, { "loss": 2.735, "learning_rate": 4.976659866732098e-07, "epoch": 4.753169114659393, "total_flos": 4536018870242119680, "step": 1181500 }, { "loss": 2.735, "learning_rate": 4.968548612772792e-07, "epoch": 4.753571414203588, "total_flos": 4536383194903572480, "step": 1181600 }, { "loss": 2.73, "learning_rate": 4.960437358813486e-07, "epoch": 4.753973713747782, "total_flos": 4536746712256204800, "step": 1181700 }, { "loss": 2.825, "learning_rate": 4.95232610485418e-07, "epoch": 4.754376013291977, "total_flos": 4537122004632883200, "step": 1181800 }, { "loss": 2.7475, "learning_rate": 4.944214850894874e-07, "epoch": 4.754778312836171, "total_flos": 4537513002352865280, "step": 1181900 }, { "loss": 2.7625, "learning_rate": 4.936103596935568e-07, "epoch": 4.755180612380366, "total_flos": 4537914447286333440, "step": 1182000 }, { "loss": 2.795, "learning_rate": 4.927992342976262e-07, "epoch": 4.755582911924561, "total_flos": 4538303894123581440, "step": 1182100 }, { "loss": 2.7675, "learning_rate": 4.919881089016957e-07, "epoch": 4.755985211468755, "total_flos": 4538680015054049280, "step": 1182200 }, { "loss": 2.785, "learning_rate": 4.911769835057651e-07, "epoch": 4.75638751101295, "total_flos": 4539057495662530560, "step": 1182300 }, { "loss": 2.8425, "learning_rate": 4.903658581098345e-07, "epoch": 4.7567898105571444, "total_flos": 4539443272431390720, "step": 1182400 }, { "loss": 2.705, "learning_rate": 4.895547327139039e-07, "epoch": 4.7571921101013395, "total_flos": 4539825660627701760, "step": 1182500 }, { "loss": 2.81, "learning_rate": 4.887436073179733e-07, "epoch": 4.757594409645534, "total_flos": 4540224173755453440, "step": 1182600 }, { "loss": 2.77, "learning_rate": 4.879324819220428e-07, "epoch": 4.757996709189729, "total_flos": 4540592657119580160, "step": 1182700 }, { "loss": 2.795, "learning_rate": 4.871213565261122e-07, "epoch": 4.758399008733923, "total_flos": 4540978609159434240, "step": 1182800 }, { "loss": 2.7825, "learning_rate": 4.863102311301816e-07, "epoch": 4.758801308278118, "total_flos": 4541356928944189440, "step": 1182900 }, { "loss": 2.765, "learning_rate": 4.85499105734251e-07, "epoch": 4.759203607822313, "total_flos": 4541734356440248320, "step": 1183000 }, { "loss": 2.7375, "learning_rate": 4.846879803383204e-07, "epoch": 4.759605907366507, "total_flos": 4542124860214702080, "step": 1183100 }, { "loss": 2.8, "learning_rate": 4.838768549423898e-07, "epoch": 4.760008206910702, "total_flos": 4542534824380723200, "step": 1183200 }, { "loss": 2.7925, "learning_rate": 4.830657295464592e-07, "epoch": 4.760410506454896, "total_flos": 4542928929177415680, "step": 1183300 }, { "loss": 2.7125, "learning_rate": 4.822546041505287e-07, "epoch": 4.760812805999091, "total_flos": 4543317324388700160, "step": 1183400 }, { "loss": 2.7775, "learning_rate": 4.814434787545981e-07, "epoch": 4.761215105543285, "total_flos": 4543707451064954880, "step": 1183500 }, { "loss": 2.7875, "learning_rate": 4.806323533586675e-07, "epoch": 4.76161740508748, "total_flos": 4544079556696289280, "step": 1183600 }, { "loss": 2.7625, "learning_rate": 4.798212279627369e-07, "epoch": 4.762019704631674, "total_flos": 4544465662762168320, "step": 1183700 }, { "loss": 2.7375, "learning_rate": 4.790101025668063e-07, "epoch": 4.762422004175869, "total_flos": 4544852284018544640, "step": 1183800 }, { "loss": 2.7475, "learning_rate": 4.781989771708757e-07, "epoch": 4.762824303720064, "total_flos": 4545243478254489600, "step": 1183900 }, { "loss": 2.745, "learning_rate": 4.773878517749452e-07, "epoch": 4.7632266032642585, "total_flos": 4545637476826337280, "step": 1184000 }, { "loss": 2.76, "learning_rate": 4.765767263790146e-07, "epoch": 4.763628902808453, "total_flos": 4546039660022476800, "step": 1184100 }, { "loss": 2.815, "learning_rate": 4.75765600983084e-07, "epoch": 4.764031202352648, "total_flos": 4546407596328652800, "step": 1184200 }, { "loss": 2.7475, "learning_rate": 4.7495447558715343e-07, "epoch": 4.764433501896843, "total_flos": 4546795550706831360, "step": 1184300 }, { "loss": 2.765, "learning_rate": 4.7414335019122283e-07, "epoch": 4.764835801441037, "total_flos": 4547164703287480320, "step": 1184400 }, { "loss": 2.7575, "learning_rate": 4.733322247952923e-07, "epoch": 4.765238100985232, "total_flos": 4547564305219891200, "step": 1184500 }, { "loss": 2.805, "learning_rate": 4.725210993993617e-07, "epoch": 4.765640400529426, "total_flos": 4547951075191050240, "step": 1184600 }, { "loss": 2.795, "learning_rate": 4.717099740034311e-07, "epoch": 4.766042700073621, "total_flos": 4548328948831457280, "step": 1184700 }, { "loss": 2.7725, "learning_rate": 4.708988486075005e-07, "epoch": 4.766444999617815, "total_flos": 4548721019422371840, "step": 1184800 }, { "loss": 2.8175, "learning_rate": 4.700877232115699e-07, "epoch": 4.76684729916201, "total_flos": 4549107948730798080, "step": 1184900 }, { "loss": 2.6975, "learning_rate": 4.6927659781563933e-07, "epoch": 4.767249598706204, "total_flos": 4549510663051161600, "step": 1185000 }, { "loss": 2.735, "learning_rate": 4.6846547241970874e-07, "epoch": 4.767651898250399, "total_flos": 4549909346138664960, "step": 1185100 }, { "loss": 2.7825, "learning_rate": 4.676543470237782e-07, "epoch": 4.768054197794594, "total_flos": 4550271169205022720, "step": 1185200 }, { "loss": 2.72, "learning_rate": 4.668432216278476e-07, "epoch": 4.768456497338788, "total_flos": 4550658645571399680, "step": 1185300 }, { "loss": 2.7925, "learning_rate": 4.66032096231917e-07, "epoch": 4.768858796882983, "total_flos": 4551045134046720000, "step": 1185400 }, { "loss": 2.7275, "learning_rate": 4.652209708359864e-07, "epoch": 4.7692610964271775, "total_flos": 4551412188686684160, "step": 1185500 }, { "loss": 2.7975, "learning_rate": 4.6440984544005583e-07, "epoch": 4.769663395971373, "total_flos": 4551802453455237120, "step": 1185600 }, { "loss": 2.75, "learning_rate": 4.6359872004412524e-07, "epoch": 4.770065695515567, "total_flos": 4552206399983800320, "step": 1185700 }, { "loss": 2.795, "learning_rate": 4.6278759464819465e-07, "epoch": 4.770467995059762, "total_flos": 4552610420869754880, "step": 1185800 }, { "loss": 2.755, "learning_rate": 4.619764692522641e-07, "epoch": 4.770870294603956, "total_flos": 4552991741506375680, "step": 1185900 }, { "loss": 2.7925, "learning_rate": 4.611653438563335e-07, "epoch": 4.771272594148151, "total_flos": 4553376556940390400, "step": 1186000 }, { "loss": 2.835, "learning_rate": 4.603542184604029e-07, "epoch": 4.771674893692346, "total_flos": 4553761308639498240, "step": 1186100 }, { "loss": 2.84, "learning_rate": 4.5954309306447233e-07, "epoch": 4.77207719323654, "total_flos": 4554132553849589760, "step": 1186200 }, { "loss": 2.7825, "learning_rate": 4.5873196766854174e-07, "epoch": 4.772479492780735, "total_flos": 4554514166604533760, "step": 1186300 }, { "loss": 2.765, "learning_rate": 4.579208422726112e-07, "epoch": 4.772881792324929, "total_flos": 4554903799335260160, "step": 1186400 }, { "loss": 2.775, "learning_rate": 4.571097168766806e-07, "epoch": 4.773284091869124, "total_flos": 4555288970622504960, "step": 1186500 }, { "loss": 2.775, "learning_rate": 4.5629859148075e-07, "epoch": 4.773686391413318, "total_flos": 4555676431055155200, "step": 1186600 }, { "loss": 2.7225, "learning_rate": 4.554874660848194e-07, "epoch": 4.774088690957513, "total_flos": 4556045031266611200, "step": 1186700 }, { "loss": 2.74, "learning_rate": 4.5467634068888883e-07, "epoch": 4.774490990501707, "total_flos": 4556436613223239680, "step": 1186800 }, { "loss": 2.7975, "learning_rate": 4.5386521529295824e-07, "epoch": 4.7748932900459025, "total_flos": 4556807603493703680, "step": 1186900 }, { "loss": 2.7975, "learning_rate": 4.5305408989702764e-07, "epoch": 4.7752955895900975, "total_flos": 4557178811525099520, "step": 1187000 }, { "loss": 2.72, "learning_rate": 4.5224296450109705e-07, "epoch": 4.775697889134292, "total_flos": 4557556632053084160, "step": 1187100 }, { "loss": 2.7375, "learning_rate": 4.5143183910516646e-07, "epoch": 4.776100188678486, "total_flos": 4557938887468339200, "step": 1187200 }, { "loss": 2.7975, "learning_rate": 4.5062071370923587e-07, "epoch": 4.776502488222681, "total_flos": 4558320494912040960, "step": 1187300 }, { "loss": 2.7175, "learning_rate": 4.4980958831330533e-07, "epoch": 4.776904787766876, "total_flos": 4558688255947223040, "step": 1187400 }, { "loss": 2.705, "learning_rate": 4.4899846291737474e-07, "epoch": 4.77730708731107, "total_flos": 4559066458884648960, "step": 1187500 }, { "loss": 2.7525, "learning_rate": 4.481873375214442e-07, "epoch": 4.777709386855265, "total_flos": 4559453058896056320, "step": 1187600 }, { "loss": 2.8225, "learning_rate": 4.473762121255136e-07, "epoch": 4.778111686399459, "total_flos": 4559843509558087680, "step": 1187700 }, { "loss": 2.765, "learning_rate": 4.46565086729583e-07, "epoch": 4.778513985943654, "total_flos": 4560218918782095360, "step": 1187800 }, { "loss": 2.805, "learning_rate": 4.457539613336524e-07, "epoch": 4.778916285487848, "total_flos": 4560597158898216960, "step": 1187900 }, { "loss": 2.7825, "learning_rate": 4.449428359377218e-07, "epoch": 4.779318585032043, "total_flos": 4560962821992714240, "step": 1188000 }, { "loss": 2.7625, "learning_rate": 4.4413171054179123e-07, "epoch": 4.779720884576237, "total_flos": 4561342841374986240, "step": 1188100 }, { "loss": 2.7525, "learning_rate": 4.4332058514586064e-07, "epoch": 4.780123184120432, "total_flos": 4561709524227993600, "step": 1188200 }, { "loss": 2.75, "learning_rate": 4.4250945974993005e-07, "epoch": 4.780525483664627, "total_flos": 4562083679998832640, "step": 1188300 }, { "loss": 2.82, "learning_rate": 4.4169833435399946e-07, "epoch": 4.7809277832088215, "total_flos": 4562477843219189760, "step": 1188400 }, { "loss": 2.78, "learning_rate": 4.4088720895806886e-07, "epoch": 4.7813300827530165, "total_flos": 4562854017262080000, "step": 1188500 }, { "loss": 2.815, "learning_rate": 4.4007608356213827e-07, "epoch": 4.781732382297211, "total_flos": 4563232756634972160, "step": 1188600 }, { "loss": 2.7575, "learning_rate": 4.392649581662078e-07, "epoch": 4.782134681841406, "total_flos": 4563617811074887680, "step": 1188700 }, { "loss": 2.7675, "learning_rate": 4.384538327702772e-07, "epoch": 4.7825369813856, "total_flos": 4564006445292072960, "step": 1188800 }, { "loss": 2.7575, "learning_rate": 4.376427073743466e-07, "epoch": 4.782939280929795, "total_flos": 4564391956498821120, "step": 1188900 }, { "loss": 2.7625, "learning_rate": 4.36831581978416e-07, "epoch": 4.783341580473989, "total_flos": 4564759749401456640, "step": 1189000 }, { "loss": 2.7525, "learning_rate": 4.360204565824854e-07, "epoch": 4.783743880018184, "total_flos": 4565140161815654400, "step": 1189100 }, { "loss": 2.7775, "learning_rate": 4.352093311865548e-07, "epoch": 4.784146179562379, "total_flos": 4565538451871232000, "step": 1189200 }, { "loss": 2.745, "learning_rate": 4.3439820579062423e-07, "epoch": 4.784548479106573, "total_flos": 4565935276023951360, "step": 1189300 }, { "loss": 2.7475, "learning_rate": 4.3358708039469364e-07, "epoch": 4.784950778650768, "total_flos": 4566325291164119040, "step": 1189400 }, { "loss": 2.7725, "learning_rate": 4.3277595499876305e-07, "epoch": 4.785353078194962, "total_flos": 4566722349011496960, "step": 1189500 }, { "loss": 2.76, "learning_rate": 4.3196482960283245e-07, "epoch": 4.785755377739157, "total_flos": 4567106654566256640, "step": 1189600 }, { "loss": 2.795, "learning_rate": 4.3115370420690186e-07, "epoch": 4.786157677283351, "total_flos": 4567490274970767360, "step": 1189700 }, { "loss": 2.7475, "learning_rate": 4.3034257881097127e-07, "epoch": 4.786559976827546, "total_flos": 4567853032815759360, "step": 1189800 }, { "loss": 2.8175, "learning_rate": 4.295314534150408e-07, "epoch": 4.7869622763717405, "total_flos": 4568228702290636800, "step": 1189900 }, { "loss": 2.7925, "learning_rate": 4.287203280191102e-07, "epoch": 4.787364575915936, "total_flos": 4568599920944517120, "step": 1190000 }, { "loss": 2.775, "learning_rate": 4.279092026231796e-07, "epoch": 4.787766875460131, "total_flos": 4568978947124490240, "step": 1190100 }, { "loss": 2.56, "learning_rate": 4.27098077227249e-07, "epoch": 4.788169175004325, "total_flos": 4569350797816197120, "step": 1190200 }, { "loss": 2.67, "learning_rate": 4.262869518313184e-07, "epoch": 4.78857147454852, "total_flos": 4569747053665996800, "step": 1190300 }, { "loss": 2.61, "learning_rate": 4.254758264353878e-07, "epoch": 4.788973774092714, "total_flos": 4570127088981995520, "step": 1190400 }, { "loss": 2.69, "learning_rate": 4.2466470103945723e-07, "epoch": 4.789376073636909, "total_flos": 4570523573215211520, "step": 1190500 }, { "loss": 2.615, "learning_rate": 4.2385357564352664e-07, "epoch": 4.789778373181103, "total_flos": 4570913455574323200, "step": 1190600 }, { "loss": 2.69, "learning_rate": 4.2304245024759604e-07, "epoch": 4.790180672725298, "total_flos": 4571295716300820480, "step": 1190700 }, { "loss": 2.67, "learning_rate": 4.2223132485166545e-07, "epoch": 4.790582972269492, "total_flos": 4571686878669312000, "step": 1190800 }, { "loss": 2.58, "learning_rate": 4.2142019945573486e-07, "epoch": 4.790985271813687, "total_flos": 4572083453193646080, "step": 1190900 }, { "loss": 2.65, "learning_rate": 4.2060907405980427e-07, "epoch": 4.791387571357881, "total_flos": 4572471635955240960, "step": 1191000 }, { "loss": 2.67, "learning_rate": 4.197979486638738e-07, "epoch": 4.791789870902076, "total_flos": 4572857922603356160, "step": 1191100 }, { "loss": 2.66, "learning_rate": 4.189868232679432e-07, "epoch": 4.79219217044627, "total_flos": 4573232290823884800, "step": 1191200 }, { "loss": 2.6, "learning_rate": 4.181756978720126e-07, "epoch": 4.7925944699904655, "total_flos": 4573605633974661120, "step": 1191300 }, { "loss": 2.69, "learning_rate": 4.17364572476082e-07, "epoch": 4.7929967695346605, "total_flos": 4573973947379036160, "step": 1191400 }, { "loss": 2.705, "learning_rate": 4.165534470801514e-07, "epoch": 4.793399069078855, "total_flos": 4574367552918958080, "step": 1191500 }, { "loss": 2.65, "learning_rate": 4.157423216842208e-07, "epoch": 4.79380136862305, "total_flos": 4574747397030236160, "step": 1191600 }, { "loss": 2.585, "learning_rate": 4.149311962882902e-07, "epoch": 4.794203668167244, "total_flos": 4575138304459100160, "step": 1191700 }, { "loss": 2.695, "learning_rate": 4.1412007089235963e-07, "epoch": 4.794605967711439, "total_flos": 4575525626799452160, "step": 1191800 }, { "loss": 2.63, "learning_rate": 4.1330894549642904e-07, "epoch": 4.795008267255633, "total_flos": 4575892792975503360, "step": 1191900 }, { "loss": 2.685, "learning_rate": 4.1249782010049845e-07, "epoch": 4.795410566799828, "total_flos": 4576268414649200640, "step": 1192000 }, { "loss": 2.625, "learning_rate": 4.1168669470456786e-07, "epoch": 4.795812866344022, "total_flos": 4576646452938117120, "step": 1192100 }, { "loss": 2.675, "learning_rate": 4.1087556930863726e-07, "epoch": 4.796215165888217, "total_flos": 4577029892760391680, "step": 1192200 }, { "loss": 2.655, "learning_rate": 4.100644439127067e-07, "epoch": 4.796617465432412, "total_flos": 4577412594319994880, "step": 1192300 }, { "loss": 2.56, "learning_rate": 4.092533185167762e-07, "epoch": 4.797019764976606, "total_flos": 4577792873953136640, "step": 1192400 }, { "loss": 2.635, "learning_rate": 4.084421931208456e-07, "epoch": 4.797422064520801, "total_flos": 4578175633936404480, "step": 1192500 }, { "loss": 2.585, "learning_rate": 4.07631067724915e-07, "epoch": 4.797824364064995, "total_flos": 4578562871296880640, "step": 1192600 }, { "loss": 2.585, "learning_rate": 4.068199423289844e-07, "epoch": 4.79822666360919, "total_flos": 4578949391639654400, "step": 1192700 }, { "loss": 2.585, "learning_rate": 4.060088169330538e-07, "epoch": 4.7986289631533845, "total_flos": 4579331832948387840, "step": 1192800 }, { "loss": 2.53, "learning_rate": 4.051976915371232e-07, "epoch": 4.7990312626975795, "total_flos": 4579723499884892160, "step": 1192900 }, { "loss": 2.62, "learning_rate": 4.0438656614119263e-07, "epoch": 4.799433562241774, "total_flos": 4580115565164564480, "step": 1193000 }, { "loss": 2.685, "learning_rate": 4.0357544074526204e-07, "epoch": 4.799835861785969, "total_flos": 4580485886218506240, "step": 1193100 }, { "loss": 2.77, "learning_rate": 4.0276431534933145e-07, "epoch": 4.800238161330164, "total_flos": 4580863786415124480, "step": 1193200 }, { "loss": 2.545, "learning_rate": 4.0195318995340085e-07, "epoch": 4.800640460874358, "total_flos": 4581246397683609600, "step": 1193300 }, { "loss": 2.685, "learning_rate": 4.0114206455747026e-07, "epoch": 4.801042760418553, "total_flos": 4581618731698360320, "step": 1193400 }, { "loss": 2.58, "learning_rate": 4.003309391615397e-07, "epoch": 4.801445059962747, "total_flos": 4582016724324372480, "step": 1193500 }, { "loss": 2.66, "learning_rate": 3.9951981376560913e-07, "epoch": 4.801847359506942, "total_flos": 4582398825713602560, "step": 1193600 }, { "loss": 2.575, "learning_rate": 3.9870868836967854e-07, "epoch": 4.802249659051136, "total_flos": 4582791618633461760, "step": 1193700 }, { "loss": 2.61, "learning_rate": 3.97897562973748e-07, "epoch": 4.802651958595331, "total_flos": 4583184852386426880, "step": 1193800 }, { "loss": 2.64, "learning_rate": 3.970864375778174e-07, "epoch": 4.803054258139525, "total_flos": 4583580253126225920, "step": 1193900 }, { "loss": 2.675, "learning_rate": 3.962753121818868e-07, "epoch": 4.80345655768372, "total_flos": 4583966061762539520, "step": 1194000 }, { "loss": 2.67, "learning_rate": 3.954641867859562e-07, "epoch": 4.803858857227914, "total_flos": 4584351790730219520, "step": 1194100 }, { "loss": 2.595, "learning_rate": 3.9465306139002563e-07, "epoch": 4.804261156772109, "total_flos": 4584735443002183680, "step": 1194200 }, { "loss": 2.645, "learning_rate": 3.9384193599409504e-07, "epoch": 4.8046634563163035, "total_flos": 4585129282236764160, "step": 1194300 }, { "loss": 2.705, "learning_rate": 3.9303081059816444e-07, "epoch": 4.8050657558604986, "total_flos": 4585510953415372800, "step": 1194400 }, { "loss": 2.585, "learning_rate": 3.9221968520223385e-07, "epoch": 4.805468055404694, "total_flos": 4585890000840314880, "step": 1194500 }, { "loss": 2.595, "learning_rate": 3.9140855980630326e-07, "epoch": 4.805870354948888, "total_flos": 4586269382873518080, "step": 1194600 }, { "loss": 2.63, "learning_rate": 3.905974344103727e-07, "epoch": 4.806272654493083, "total_flos": 4586651346170449920, "step": 1194700 }, { "loss": 2.675, "learning_rate": 3.8978630901444213e-07, "epoch": 4.806674954037277, "total_flos": 4587040187526082560, "step": 1194800 }, { "loss": 2.67, "learning_rate": 3.8897518361851153e-07, "epoch": 4.807077253581472, "total_flos": 4587437930523709440, "step": 1194900 }, { "loss": 2.615, "learning_rate": 3.8816405822258094e-07, "epoch": 4.807479553125666, "total_flos": 4587817429404241920, "step": 1195000 }, { "loss": 2.615, "learning_rate": 3.8735293282665035e-07, "epoch": 4.807881852669861, "total_flos": 4588202850319872000, "step": 1195100 }, { "loss": 2.64, "learning_rate": 3.865418074307198e-07, "epoch": 4.808284152214055, "total_flos": 4588590496646000640, "step": 1195200 }, { "loss": 2.645, "learning_rate": 3.857306820347892e-07, "epoch": 4.80868645175825, "total_flos": 4588963324606279680, "step": 1195300 }, { "loss": 2.66, "learning_rate": 3.849195566388586e-07, "epoch": 4.809088751302445, "total_flos": 4589345919941038080, "step": 1195400 }, { "loss": 2.61, "learning_rate": 3.8410843124292803e-07, "epoch": 4.809491050846639, "total_flos": 4589729370385797120, "step": 1195500 }, { "loss": 2.565, "learning_rate": 3.8329730584699744e-07, "epoch": 4.809893350390834, "total_flos": 4590118753488138240, "step": 1195600 }, { "loss": 2.64, "learning_rate": 3.8248618045106685e-07, "epoch": 4.8102956499350285, "total_flos": 4590498900340224000, "step": 1195700 }, { "loss": 2.585, "learning_rate": 3.8167505505513626e-07, "epoch": 4.8106979494792235, "total_flos": 4590895453619589120, "step": 1195800 }, { "loss": 2.605, "learning_rate": 3.808639296592057e-07, "epoch": 4.811100249023418, "total_flos": 4591288469611622400, "step": 1195900 }, { "loss": 2.575, "learning_rate": 3.800528042632751e-07, "epoch": 4.811502548567613, "total_flos": 4591667145249607680, "step": 1196000 }, { "loss": 2.63, "learning_rate": 3.7924167886734453e-07, "epoch": 4.811904848111807, "total_flos": 4592043005929205760, "step": 1196100 }, { "loss": 2.565, "learning_rate": 3.7843055347141394e-07, "epoch": 4.812307147656002, "total_flos": 4592433472524963840, "step": 1196200 }, { "loss": 2.71, "learning_rate": 3.7761942807548335e-07, "epoch": 4.812709447200197, "total_flos": 4592812774889533440, "step": 1196300 }, { "loss": 2.625, "learning_rate": 3.7680830267955275e-07, "epoch": 4.813111746744391, "total_flos": 4593193304151060480, "step": 1196400 }, { "loss": 2.62, "learning_rate": 3.7599717728362216e-07, "epoch": 4.813514046288586, "total_flos": 4593563827032207360, "step": 1196500 }, { "loss": 2.63, "learning_rate": 3.751860518876916e-07, "epoch": 4.81391634583278, "total_flos": 4593942449557770240, "step": 1196600 }, { "loss": 2.61, "learning_rate": 3.7437492649176103e-07, "epoch": 4.814318645376975, "total_flos": 4594314518010408960, "step": 1196700 }, { "loss": 2.585, "learning_rate": 3.7356380109583044e-07, "epoch": 4.814720944921169, "total_flos": 4594694882623426560, "step": 1196800 }, { "loss": 2.615, "learning_rate": 3.7275267569989985e-07, "epoch": 4.815123244465364, "total_flos": 4595095844233850880, "step": 1196900 }, { "loss": 2.645, "learning_rate": 3.719415503039693e-07, "epoch": 4.815525544009558, "total_flos": 4595458219669401600, "step": 1197000 }, { "loss": 2.65, "learning_rate": 3.711304249080387e-07, "epoch": 4.815927843553753, "total_flos": 4595842567714099200, "step": 1197100 }, { "loss": 2.645, "learning_rate": 3.703192995121081e-07, "epoch": 4.8163301430979475, "total_flos": 4596208395457105920, "step": 1197200 }, { "loss": 2.715, "learning_rate": 3.6950817411617753e-07, "epoch": 4.8167324426421425, "total_flos": 4596598904542801920, "step": 1197300 }, { "loss": 2.54, "learning_rate": 3.6869704872024694e-07, "epoch": 4.817134742186337, "total_flos": 4596986959834583040, "step": 1197400 }, { "loss": 2.645, "learning_rate": 3.6788592332431634e-07, "epoch": 4.817537041730532, "total_flos": 4597369119647477760, "step": 1197500 }, { "loss": 2.675, "learning_rate": 3.6707479792838575e-07, "epoch": 4.817939341274727, "total_flos": 4597746690547077120, "step": 1197600 }, { "loss": 2.625, "learning_rate": 3.6626367253245516e-07, "epoch": 4.818341640818921, "total_flos": 4598127203874877440, "step": 1197700 }, { "loss": 2.58, "learning_rate": 3.6545254713652457e-07, "epoch": 4.818743940363116, "total_flos": 4598502921150935040, "step": 1197800 }, { "loss": 2.63, "learning_rate": 3.64641421740594e-07, "epoch": 4.81914623990731, "total_flos": 4598864717661081600, "step": 1197900 }, { "loss": 2.635, "learning_rate": 3.6383029634466343e-07, "epoch": 4.819548539451505, "total_flos": 4599236791424962560, "step": 1198000 }, { "loss": 2.625, "learning_rate": 3.6301917094873284e-07, "epoch": 4.819950838995699, "total_flos": 4599623136496742400, "step": 1198100 }, { "loss": 2.62, "learning_rate": 3.622080455528023e-07, "epoch": 4.820353138539894, "total_flos": 4599995550180126720, "step": 1198200 }, { "loss": 2.66, "learning_rate": 3.613969201568717e-07, "epoch": 4.820755438084088, "total_flos": 4600380822380974080, "step": 1198300 }, { "loss": 2.61, "learning_rate": 3.605857947609411e-07, "epoch": 4.821157737628283, "total_flos": 4600765584702566400, "step": 1198400 }, { "loss": 2.605, "learning_rate": 3.597746693650105e-07, "epoch": 4.821560037172478, "total_flos": 4601138375484149760, "step": 1198500 }, { "loss": 2.635, "learning_rate": 3.5896354396907993e-07, "epoch": 4.821962336716672, "total_flos": 4601523445857792000, "step": 1198600 }, { "loss": 2.64, "learning_rate": 3.5815241857314934e-07, "epoch": 4.822364636260867, "total_flos": 4601923371775979520, "step": 1198700 }, { "loss": 2.7, "learning_rate": 3.5734129317721875e-07, "epoch": 4.8227669358050616, "total_flos": 4602319951611555840, "step": 1198800 }, { "loss": 2.655, "learning_rate": 3.5653016778128816e-07, "epoch": 4.823169235349257, "total_flos": 4602710381028618240, "step": 1198900 }, { "loss": 2.61, "learning_rate": 3.5571904238535756e-07, "epoch": 4.823571534893451, "total_flos": 4603099827865866240, "step": 1199000 }, { "loss": 2.585, "learning_rate": 3.5490791698942697e-07, "epoch": 4.823973834437646, "total_flos": 4603492907592806400, "step": 1199100 }, { "loss": 2.62, "learning_rate": 3.540967915934964e-07, "epoch": 4.82437613398184, "total_flos": 4603884760422789120, "step": 1199200 }, { "loss": 2.585, "learning_rate": 3.532856661975658e-07, "epoch": 4.824778433526035, "total_flos": 4604278211936686080, "step": 1199300 }, { "loss": 2.585, "learning_rate": 3.524745408016353e-07, "epoch": 4.82518073307023, "total_flos": 4604671812165365760, "step": 1199400 }, { "loss": 2.61, "learning_rate": 3.516634154057047e-07, "epoch": 4.825583032614424, "total_flos": 4605047518818938880, "step": 1199500 }, { "loss": 2.555, "learning_rate": 3.508522900097741e-07, "epoch": 4.825985332158619, "total_flos": 4605423719418040320, "step": 1199600 }, { "loss": 2.64, "learning_rate": 3.500411646138435e-07, "epoch": 4.826387631702813, "total_flos": 4605799096774594560, "step": 1199700 }, { "loss": 2.715, "learning_rate": 3.4923003921791293e-07, "epoch": 4.826789931247008, "total_flos": 4606176274642268160, "step": 1199800 }, { "loss": 2.7, "learning_rate": 3.4841891382198234e-07, "epoch": 4.827192230791202, "total_flos": 4606551142119567360, "step": 1199900 }, { "loss": 2.61, "learning_rate": 3.4760778842605175e-07, "epoch": 4.827594530335397, "total_flos": 4606924772077424640, "step": 1200000 }, { "loss": 2.68, "learning_rate": 3.4679666303012115e-07, "epoch": 4.8279968298795914, "total_flos": 4607292942078259200, "step": 1200100 }, { "loss": 2.72, "learning_rate": 3.4598553763419056e-07, "epoch": 4.8283991294237865, "total_flos": 4607670746672517120, "step": 1200200 }, { "loss": 2.64, "learning_rate": 3.4517441223825997e-07, "epoch": 4.828801428967981, "total_flos": 4608050272109260800, "step": 1200300 }, { "loss": 2.655, "learning_rate": 3.443632868423294e-07, "epoch": 4.829203728512176, "total_flos": 4608400128946851840, "step": 1200400 }, { "loss": 2.555, "learning_rate": 3.435521614463988e-07, "epoch": 4.82960602805637, "total_flos": 4608790866415964160, "step": 1200500 }, { "loss": 2.545, "learning_rate": 3.427410360504683e-07, "epoch": 4.830008327600565, "total_flos": 4609187324092968960, "step": 1200600 }, { "loss": 2.645, "learning_rate": 3.419299106545377e-07, "epoch": 4.83041062714476, "total_flos": 4609584780283514880, "step": 1200700 }, { "loss": 2.615, "learning_rate": 3.411187852586071e-07, "epoch": 4.830812926688954, "total_flos": 4609964502236221440, "step": 1200800 }, { "loss": 2.565, "learning_rate": 3.403076598626765e-07, "epoch": 4.831215226233149, "total_flos": 4610358851350056960, "step": 1200900 }, { "loss": 2.69, "learning_rate": 3.3949653446674593e-07, "epoch": 4.831617525777343, "total_flos": 4610744872436060160, "step": 1201000 }, { "loss": 2.585, "learning_rate": 3.3868540907081534e-07, "epoch": 4.832019825321538, "total_flos": 4611113865679441920, "step": 1201100 }, { "loss": 2.615, "learning_rate": 3.3787428367488474e-07, "epoch": 4.832422124865732, "total_flos": 4611477707017850880, "step": 1201200 }, { "loss": 2.655, "learning_rate": 3.3706315827895415e-07, "epoch": 4.832824424409927, "total_flos": 4611861922281492480, "step": 1201300 }, { "loss": 2.625, "learning_rate": 3.3625203288302356e-07, "epoch": 4.833226723954121, "total_flos": 4612252420744704000, "step": 1201400 }, { "loss": 2.66, "learning_rate": 3.3544090748709297e-07, "epoch": 4.833629023498316, "total_flos": 4612633003118653440, "step": 1201500 }, { "loss": 2.635, "learning_rate": 3.346297820911624e-07, "epoch": 4.834031323042511, "total_flos": 4613034527720755200, "step": 1201600 }, { "loss": 2.685, "learning_rate": 3.338186566952318e-07, "epoch": 4.8344336225867055, "total_flos": 4613422891064586240, "step": 1201700 }, { "loss": 2.73, "learning_rate": 3.330075312993013e-07, "epoch": 4.8348359221309005, "total_flos": 4613806654872637440, "step": 1201800 }, { "loss": 2.57, "learning_rate": 3.321964059033707e-07, "epoch": 4.835238221675095, "total_flos": 4614207441212067840, "step": 1201900 }, { "loss": 2.625, "learning_rate": 3.313852805074401e-07, "epoch": 4.83564052121929, "total_flos": 4614589686004838400, "step": 1202000 }, { "loss": 2.575, "learning_rate": 3.305741551115095e-07, "epoch": 4.836042820763484, "total_flos": 4614974543928791040, "step": 1202100 }, { "loss": 2.645, "learning_rate": 3.297630297155789e-07, "epoch": 4.836445120307679, "total_flos": 4615350277138575360, "step": 1202200 }, { "loss": 2.585, "learning_rate": 3.2895190431964833e-07, "epoch": 4.836847419851873, "total_flos": 4615714431840276480, "step": 1202300 }, { "loss": 2.625, "learning_rate": 3.2814077892371774e-07, "epoch": 4.837249719396068, "total_flos": 4616099321631682560, "step": 1202400 }, { "loss": 2.66, "learning_rate": 3.2732965352778715e-07, "epoch": 4.837652018940263, "total_flos": 4616482602116689920, "step": 1202500 }, { "loss": 2.58, "learning_rate": 3.2651852813185656e-07, "epoch": 4.838054318484457, "total_flos": 4616866541195735040, "step": 1202600 }, { "loss": 2.585, "learning_rate": 3.2570740273592596e-07, "epoch": 4.838456618028652, "total_flos": 4617258553362984960, "step": 1202700 }, { "loss": 2.62, "learning_rate": 3.2489627733999537e-07, "epoch": 4.838858917572846, "total_flos": 4617642210946191360, "step": 1202800 }, { "loss": 2.6, "learning_rate": 3.240851519440648e-07, "epoch": 4.839261217117041, "total_flos": 4618037903804313600, "step": 1202900 }, { "loss": 2.625, "learning_rate": 3.2327402654813424e-07, "epoch": 4.839663516661235, "total_flos": 4618414577103974400, "step": 1203000 }, { "loss": 2.63, "learning_rate": 3.224629011522037e-07, "epoch": 4.84006581620543, "total_flos": 4618802138450227200, "step": 1203100 }, { "loss": 2.63, "learning_rate": 3.216517757562731e-07, "epoch": 4.8404681157496245, "total_flos": 4619189710418964480, "step": 1203200 }, { "loss": 2.645, "learning_rate": 3.208406503603425e-07, "epoch": 4.84087041529382, "total_flos": 4619570988565647360, "step": 1203300 }, { "loss": 2.6, "learning_rate": 3.200295249644119e-07, "epoch": 4.841272714838015, "total_flos": 4619956653798420480, "step": 1203400 }, { "loss": 2.7, "learning_rate": 3.1921839956848133e-07, "epoch": 4.841675014382209, "total_flos": 4620343726510387200, "step": 1203500 }, { "loss": 2.665, "learning_rate": 3.1840727417255074e-07, "epoch": 4.842077313926403, "total_flos": 4620742319306772480, "step": 1203600 }, { "loss": 2.615, "learning_rate": 3.1759614877662015e-07, "epoch": 4.842479613470598, "total_flos": 4621135797376880640, "step": 1203700 }, { "loss": 2.555, "learning_rate": 3.1678502338068955e-07, "epoch": 4.842881913014793, "total_flos": 4621526529534750720, "step": 1203800 }, { "loss": 2.595, "learning_rate": 3.1597389798475896e-07, "epoch": 4.843284212558987, "total_flos": 4621916741190881280, "step": 1203900 }, { "loss": 2.695, "learning_rate": 3.1516277258882837e-07, "epoch": 4.843686512103182, "total_flos": 4622303240288686080, "step": 1204000 }, { "loss": 2.685, "learning_rate": 3.1435164719289783e-07, "epoch": 4.844088811647376, "total_flos": 4622677003027599360, "step": 1204100 }, { "loss": 2.565, "learning_rate": 3.1354052179696724e-07, "epoch": 4.844491111191571, "total_flos": 4623058573292605440, "step": 1204200 }, { "loss": 2.565, "learning_rate": 3.1272939640103664e-07, "epoch": 4.844893410735765, "total_flos": 4623437280798044160, "step": 1204300 }, { "loss": 2.665, "learning_rate": 3.1191827100510605e-07, "epoch": 4.84529571027996, "total_flos": 4623825930948956160, "step": 1204400 }, { "loss": 2.69, "learning_rate": 3.111071456091755e-07, "epoch": 4.845698009824154, "total_flos": 4624202184660480000, "step": 1204500 }, { "loss": 2.675, "learning_rate": 3.102960202132449e-07, "epoch": 4.8461003093683495, "total_flos": 4624572787210260480, "step": 1204600 }, { "loss": 2.615, "learning_rate": 3.0948489481731433e-07, "epoch": 4.8465026089125445, "total_flos": 4624961575453470720, "step": 1204700 }, { "loss": 2.62, "learning_rate": 3.0867376942138373e-07, "epoch": 4.846904908456739, "total_flos": 4625335826826670080, "step": 1204800 }, { "loss": 2.565, "learning_rate": 3.0786264402545314e-07, "epoch": 4.847307208000934, "total_flos": 4625730053781934080, "step": 1204900 }, { "loss": 2.7, "learning_rate": 3.0705151862952255e-07, "epoch": 4.847709507545128, "total_flos": 4626115145400545280, "step": 1205000 }, { "loss": 2.62, "learning_rate": 3.0624039323359196e-07, "epoch": 4.848111807089323, "total_flos": 4626509064303759360, "step": 1205100 }, { "loss": 2.6, "learning_rate": 3.054292678376614e-07, "epoch": 4.848514106633517, "total_flos": 4626878195639439360, "step": 1205200 }, { "loss": 2.545, "learning_rate": 3.046181424417308e-07, "epoch": 4.848916406177712, "total_flos": 4627273665425387520, "step": 1205300 }, { "loss": 2.665, "learning_rate": 3.0380701704580023e-07, "epoch": 4.849318705721906, "total_flos": 4627638903620505600, "step": 1205400 }, { "loss": 2.665, "learning_rate": 3.0299589164986964e-07, "epoch": 4.849721005266101, "total_flos": 4628030018187816960, "step": 1205500 }, { "loss": 2.64, "learning_rate": 3.0218476625393905e-07, "epoch": 4.850123304810296, "total_flos": 4628429274889482240, "step": 1205600 }, { "loss": 2.645, "learning_rate": 3.0137364085800846e-07, "epoch": 4.85052560435449, "total_flos": 4628802750821314560, "step": 1205700 }, { "loss": 2.635, "learning_rate": 3.0056251546207786e-07, "epoch": 4.850927903898685, "total_flos": 4629209682268016640, "step": 1205800 }, { "loss": 2.655, "learning_rate": 2.997513900661473e-07, "epoch": 4.851330203442879, "total_flos": 4629595554639237120, "step": 1205900 }, { "loss": 2.575, "learning_rate": 2.9894026467021673e-07, "epoch": 4.851732502987074, "total_flos": 4629990801353011200, "step": 1206000 }, { "loss": 2.665, "learning_rate": 2.9812913927428614e-07, "epoch": 4.8521348025312685, "total_flos": 4630371351859507200, "step": 1206100 }, { "loss": 2.615, "learning_rate": 2.9731801387835555e-07, "epoch": 4.8525371020754635, "total_flos": 4630748195118919680, "step": 1206200 }, { "loss": 2.705, "learning_rate": 2.9650688848242496e-07, "epoch": 4.852939401619658, "total_flos": 4631108461991301120, "step": 1206300 }, { "loss": 2.64, "learning_rate": 2.9569576308649436e-07, "epoch": 4.853341701163853, "total_flos": 4631484025241333760, "step": 1206400 }, { "loss": 2.66, "learning_rate": 2.9488463769056377e-07, "epoch": 4.853744000708048, "total_flos": 4631885278970081280, "step": 1206500 }, { "loss": 2.695, "learning_rate": 2.9407351229463323e-07, "epoch": 4.854146300252242, "total_flos": 4632287292206469120, "step": 1206600 }, { "loss": 2.58, "learning_rate": 2.9326238689870264e-07, "epoch": 4.854548599796436, "total_flos": 4632678289926451200, "step": 1206700 }, { "loss": 2.53, "learning_rate": 2.9245126150277205e-07, "epoch": 4.854950899340631, "total_flos": 4633057337351393280, "step": 1206800 }, { "loss": 2.66, "learning_rate": 2.9164013610684145e-07, "epoch": 4.855353198884826, "total_flos": 4633435163190620160, "step": 1206900 }, { "loss": 2.6, "learning_rate": 2.9082901071091086e-07, "epoch": 4.85575549842902, "total_flos": 4633815256930283520, "step": 1207000 }, { "loss": 2.57, "learning_rate": 2.9001788531498027e-07, "epoch": 4.856157797973215, "total_flos": 4634197480478085120, "step": 1207100 }, { "loss": 2.58, "learning_rate": 2.892067599190497e-07, "epoch": 4.856560097517409, "total_flos": 4634588658780303360, "step": 1207200 }, { "loss": 2.625, "learning_rate": 2.8839563452311914e-07, "epoch": 4.856962397061604, "total_flos": 4634974387747983360, "step": 1207300 }, { "loss": 2.71, "learning_rate": 2.8758450912718854e-07, "epoch": 4.857364696605798, "total_flos": 4635349223357829120, "step": 1207400 }, { "loss": 2.655, "learning_rate": 2.8677338373125795e-07, "epoch": 4.857766996149993, "total_flos": 4635738463056629760, "step": 1207500 }, { "loss": 2.63, "learning_rate": 2.8596225833532736e-07, "epoch": 4.8581692956941875, "total_flos": 4636120867186667520, "step": 1207600 }, { "loss": 2.67, "learning_rate": 2.8515113293939677e-07, "epoch": 4.858571595238383, "total_flos": 4636491714053591040, "step": 1207700 }, { "loss": 2.595, "learning_rate": 2.843400075434662e-07, "epoch": 4.858973894782578, "total_flos": 4636884294523760640, "step": 1207800 }, { "loss": 2.695, "learning_rate": 2.835288821475356e-07, "epoch": 4.859376194326772, "total_flos": 4637272408239206400, "step": 1207900 }, { "loss": 2.65, "learning_rate": 2.8271775675160504e-07, "epoch": 4.859778493870967, "total_flos": 4637661164614963200, "step": 1208000 }, { "loss": 2.66, "learning_rate": 2.8190663135567445e-07, "epoch": 4.860180793415161, "total_flos": 4638063347811102720, "step": 1208100 }, { "loss": 2.595, "learning_rate": 2.8109550595974386e-07, "epoch": 4.860583092959356, "total_flos": 4638442841380392960, "step": 1208200 }, { "loss": 2.53, "learning_rate": 2.8028438056381327e-07, "epoch": 4.86098539250355, "total_flos": 4638835554631618560, "step": 1208300 }, { "loss": 2.655, "learning_rate": 2.794732551678827e-07, "epoch": 4.861387692047745, "total_flos": 4639207639017984000, "step": 1208400 }, { "loss": 2.67, "learning_rate": 2.786621297719521e-07, "epoch": 4.861789991591939, "total_flos": 4639595943938150400, "step": 1208500 }, { "loss": 2.585, "learning_rate": 2.7785100437602154e-07, "epoch": 4.862192291136134, "total_flos": 4639988710301798400, "step": 1208600 }, { "loss": 2.61, "learning_rate": 2.7703987898009095e-07, "epoch": 4.862594590680329, "total_flos": 4640376712481157120, "step": 1208700 }, { "loss": 2.65, "learning_rate": 2.7622875358416036e-07, "epoch": 4.862996890224523, "total_flos": 4640760874632376320, "step": 1208800 }, { "loss": 2.6, "learning_rate": 2.7541762818822976e-07, "epoch": 4.863399189768718, "total_flos": 4641150820726394880, "step": 1208900 }, { "loss": 2.645, "learning_rate": 2.7460650279229917e-07, "epoch": 4.8638014893129125, "total_flos": 4641535078479974400, "step": 1209000 }, { "loss": 2.65, "learning_rate": 2.737953773963686e-07, "epoch": 4.8642037888571075, "total_flos": 4641906190909009920, "step": 1209100 }, { "loss": 2.65, "learning_rate": 2.7298425200043804e-07, "epoch": 4.864606088401302, "total_flos": 4642285243645194240, "step": 1209200 }, { "loss": 2.685, "learning_rate": 2.7217312660450745e-07, "epoch": 4.865008387945497, "total_flos": 4642654953906278400, "step": 1209300 }, { "loss": 2.545, "learning_rate": 2.7136200120857686e-07, "epoch": 4.865410687489691, "total_flos": 4643035074202152960, "step": 1209400 }, { "loss": 2.615, "learning_rate": 2.7055087581264626e-07, "epoch": 4.865812987033886, "total_flos": 4643381223792660480, "step": 1209500 }, { "loss": 2.645, "learning_rate": 2.6973975041671567e-07, "epoch": 4.866215286578081, "total_flos": 4643762714389032960, "step": 1209600 }, { "loss": 2.61, "learning_rate": 2.689286250207851e-07, "epoch": 4.866617586122275, "total_flos": 4644135367078318080, "step": 1209700 }, { "loss": 2.615, "learning_rate": 2.6811749962485454e-07, "epoch": 4.86701988566647, "total_flos": 4644513304453632000, "step": 1209800 }, { "loss": 2.6, "learning_rate": 2.6730637422892395e-07, "epoch": 4.867422185210664, "total_flos": 4644889871528448000, "step": 1209900 }, { "loss": 2.57, "learning_rate": 2.6649524883299335e-07, "epoch": 4.867824484754859, "total_flos": 4645272790848983040, "step": 1210000 }, { "loss": 2.635, "learning_rate": 2.6568412343706276e-07, "epoch": 4.868226784299053, "total_flos": 4645662885657784320, "step": 1210100 }, { "loss": 2.6, "learning_rate": 2.6487299804113217e-07, "epoch": 4.868629083843248, "total_flos": 4646042464206950400, "step": 1210200 }, { "loss": 2.595, "learning_rate": 2.640618726452016e-07, "epoch": 4.869031383387442, "total_flos": 4646407187211571200, "step": 1210300 }, { "loss": 2.615, "learning_rate": 2.6325074724927104e-07, "epoch": 4.869433682931637, "total_flos": 4646793500415897600, "step": 1210400 }, { "loss": 2.64, "learning_rate": 2.6243962185334045e-07, "epoch": 4.8698359824758315, "total_flos": 4647183951077928960, "step": 1210500 }, { "loss": 2.6, "learning_rate": 2.6162849645740985e-07, "epoch": 4.8702382820200265, "total_flos": 4647557697883115520, "step": 1210600 }, { "loss": 2.615, "learning_rate": 2.6081737106147926e-07, "epoch": 4.870640581564221, "total_flos": 4647955132828692480, "step": 1210700 }, { "loss": 2.62, "learning_rate": 2.6000624566554867e-07, "epoch": 4.871042881108416, "total_flos": 4648335460263014400, "step": 1210800 }, { "loss": 2.53, "learning_rate": 2.591951202696181e-07, "epoch": 4.871445180652611, "total_flos": 4648728858664488960, "step": 1210900 }, { "loss": 2.66, "learning_rate": 2.5838399487368754e-07, "epoch": 4.871847480196805, "total_flos": 4649120026344222720, "step": 1211000 }, { "loss": 2.625, "learning_rate": 2.5757286947775694e-07, "epoch": 4.872249779741, "total_flos": 4649510636343521280, "step": 1211100 }, { "loss": 2.63, "learning_rate": 2.5676174408182635e-07, "epoch": 4.872652079285194, "total_flos": 4649899127157166080, "step": 1211200 }, { "loss": 2.555, "learning_rate": 2.5595061868589576e-07, "epoch": 4.873054378829389, "total_flos": 4650282986567577600, "step": 1211300 }, { "loss": 2.69, "learning_rate": 2.5513949328996517e-07, "epoch": 4.873456678373583, "total_flos": 4650685106028810240, "step": 1211400 }, { "loss": 2.665, "learning_rate": 2.543283678940346e-07, "epoch": 4.873858977917778, "total_flos": 4651054311721881600, "step": 1211500 }, { "loss": 2.565, "learning_rate": 2.5351724249810404e-07, "epoch": 4.874261277461972, "total_flos": 4651454285441249280, "step": 1211600 }, { "loss": 2.605, "learning_rate": 2.5270611710217344e-07, "epoch": 4.874663577006167, "total_flos": 4651831898830786560, "step": 1211700 }, { "loss": 2.625, "learning_rate": 2.5189499170624285e-07, "epoch": 4.875065876550362, "total_flos": 4652215343964303360, "step": 1211800 }, { "loss": 2.685, "learning_rate": 2.5108386631031226e-07, "epoch": 4.875468176094556, "total_flos": 4652603447057264640, "step": 1211900 }, { "loss": 2.605, "learning_rate": 2.5027274091438167e-07, "epoch": 4.875870475638751, "total_flos": 4652997020729733120, "step": 1212000 }, { "loss": 2.645, "learning_rate": 2.494616155184511e-07, "epoch": 4.8762727751829456, "total_flos": 4653374166729953280, "step": 1212100 }, { "loss": 2.635, "learning_rate": 2.4865049012252053e-07, "epoch": 4.876675074727141, "total_flos": 4653776695156838400, "step": 1212200 }, { "loss": 2.58, "learning_rate": 2.4783936472658994e-07, "epoch": 4.877077374271335, "total_flos": 4654167347646074880, "step": 1212300 }, { "loss": 2.59, "learning_rate": 2.4702823933065935e-07, "epoch": 4.87747967381553, "total_flos": 4654546055151513600, "step": 1212400 }, { "loss": 2.54, "learning_rate": 2.4621711393472876e-07, "epoch": 4.877881973359724, "total_flos": 4654925341582356480, "step": 1212500 }, { "loss": 2.565, "learning_rate": 2.4540598853879816e-07, "epoch": 4.878284272903919, "total_flos": 4655304080955248640, "step": 1212600 }, { "loss": 2.65, "learning_rate": 2.445948631428676e-07, "epoch": 4.878686572448114, "total_flos": 4655697500601692160, "step": 1212700 }, { "loss": 2.62, "learning_rate": 2.4378373774693703e-07, "epoch": 4.879088871992308, "total_flos": 4656075485778186240, "step": 1212800 }, { "loss": 2.65, "learning_rate": 2.4297261235100644e-07, "epoch": 4.879491171536503, "total_flos": 4656445732474736640, "step": 1212900 }, { "loss": 2.63, "learning_rate": 2.4216148695507585e-07, "epoch": 4.879893471080697, "total_flos": 4656825279156449280, "step": 1213000 }, { "loss": 2.555, "learning_rate": 2.4135036155914526e-07, "epoch": 4.880295770624892, "total_flos": 4657229055725260800, "step": 1213100 }, { "loss": 2.57, "learning_rate": 2.4053923616321466e-07, "epoch": 4.880698070169086, "total_flos": 4657631801913077760, "step": 1213200 }, { "loss": 2.565, "learning_rate": 2.397281107672841e-07, "epoch": 4.881100369713281, "total_flos": 4658020946009518080, "step": 1213300 }, { "loss": 2.68, "learning_rate": 2.3891698537135353e-07, "epoch": 4.8815026692574754, "total_flos": 4658413011289190400, "step": 1213400 }, { "loss": 2.635, "learning_rate": 2.381058599754229e-07, "epoch": 4.8819049688016705, "total_flos": 4658795351684321280, "step": 1213500 }, { "loss": 2.595, "learning_rate": 2.3729473457949235e-07, "epoch": 4.882307268345865, "total_flos": 4659183969967779840, "step": 1213600 }, { "loss": 2.625, "learning_rate": 2.3648360918356175e-07, "epoch": 4.88270956789006, "total_flos": 4659557653038059520, "step": 1213700 }, { "loss": 2.525, "learning_rate": 2.3567248378763116e-07, "epoch": 4.883111867434254, "total_flos": 4659964048049295360, "step": 1213800 }, { "loss": 2.675, "learning_rate": 2.348613583917006e-07, "epoch": 4.883514166978449, "total_flos": 4660359894933442560, "step": 1213900 }, { "loss": 2.69, "learning_rate": 2.3405023299577e-07, "epoch": 4.883916466522644, "total_flos": 4660747312876154880, "step": 1214000 }, { "loss": 2.59, "learning_rate": 2.332391075998394e-07, "epoch": 4.884318766066838, "total_flos": 4661126992338923520, "step": 1214100 }, { "loss": 2.6, "learning_rate": 2.3242798220390882e-07, "epoch": 4.884721065611033, "total_flos": 4661505025316597760, "step": 1214200 }, { "loss": 2.645, "learning_rate": 2.3161685680797825e-07, "epoch": 4.885123365155227, "total_flos": 4661867926565130240, "step": 1214300 }, { "loss": 2.565, "learning_rate": 2.3080573141204766e-07, "epoch": 4.885525664699422, "total_flos": 4662246453488332800, "step": 1214400 }, { "loss": 2.645, "learning_rate": 2.299946060161171e-07, "epoch": 4.885927964243616, "total_flos": 4662639809399869440, "step": 1214500 }, { "loss": 2.59, "learning_rate": 2.291834806201865e-07, "epoch": 4.886330263787811, "total_flos": 4663028778225315840, "step": 1214600 }, { "loss": 2.68, "learning_rate": 2.283723552242559e-07, "epoch": 4.886732563332005, "total_flos": 4663402482540564480, "step": 1214700 }, { "loss": 2.635, "learning_rate": 2.2756122982832532e-07, "epoch": 4.8871348628762, "total_flos": 4663789953595699200, "step": 1214800 }, { "loss": 2.58, "learning_rate": 2.2675010443239472e-07, "epoch": 4.887537162420395, "total_flos": 4664190771802583040, "step": 1214900 }, { "loss": 2.615, "learning_rate": 2.2593897903646416e-07, "epoch": 4.8879394619645895, "total_flos": 4664591329758597120, "step": 1215000 }, { "loss": 2.615, "learning_rate": 2.251278536405336e-07, "epoch": 4.8883417615087845, "total_flos": 4664985551402618880, "step": 1215100 }, { "loss": 2.685, "learning_rate": 2.24316728244603e-07, "epoch": 4.888744061052979, "total_flos": 4665368242339737600, "step": 1215200 }, { "loss": 2.685, "learning_rate": 2.235056028486724e-07, "epoch": 4.889146360597174, "total_flos": 4665746652415610880, "step": 1215300 }, { "loss": 2.66, "learning_rate": 2.2269447745274182e-07, "epoch": 4.889548660141368, "total_flos": 4666130729586954240, "step": 1215400 }, { "loss": 2.675, "learning_rate": 2.2188335205681122e-07, "epoch": 4.889950959685563, "total_flos": 4666502293471580160, "step": 1215500 }, { "loss": 2.645, "learning_rate": 2.2107222666088063e-07, "epoch": 4.890353259229757, "total_flos": 4666874292878069760, "step": 1215600 }, { "loss": 2.58, "learning_rate": 2.202611012649501e-07, "epoch": 4.890755558773952, "total_flos": 4667248013127045120, "step": 1215700 }, { "loss": 2.57, "learning_rate": 2.194499758690195e-07, "epoch": 4.891157858318147, "total_flos": 4667633014454538240, "step": 1215800 }, { "loss": 2.53, "learning_rate": 2.186388504730889e-07, "epoch": 4.891560157862341, "total_flos": 4668017819266068480, "step": 1215900 }, { "loss": 2.635, "learning_rate": 2.1782772507715831e-07, "epoch": 4.891962457406536, "total_flos": 4668405736465551360, "step": 1216000 }, { "loss": 2.53, "learning_rate": 2.1701659968122772e-07, "epoch": 4.89236475695073, "total_flos": 4668793770512363520, "step": 1216100 }, { "loss": 2.635, "learning_rate": 2.1620547428529713e-07, "epoch": 4.892767056494925, "total_flos": 4669177135977246720, "step": 1216200 }, { "loss": 2.58, "learning_rate": 2.153943488893666e-07, "epoch": 4.893169356039119, "total_flos": 4669565244381450240, "step": 1216300 }, { "loss": 2.65, "learning_rate": 2.14583223493436e-07, "epoch": 4.893571655583314, "total_flos": 4669942310713036800, "step": 1216400 }, { "loss": 2.585, "learning_rate": 2.137720980975054e-07, "epoch": 4.8939739551275085, "total_flos": 4670326881829908480, "step": 1216500 }, { "loss": 2.64, "learning_rate": 2.129609727015748e-07, "epoch": 4.894376254671704, "total_flos": 4670709328449884160, "step": 1216600 }, { "loss": 2.66, "learning_rate": 2.1214984730564422e-07, "epoch": 4.894778554215898, "total_flos": 4671071656084254720, "step": 1216700 }, { "loss": 2.66, "learning_rate": 2.1133872190971363e-07, "epoch": 4.895180853760093, "total_flos": 4671433330435829760, "step": 1216800 }, { "loss": 2.55, "learning_rate": 2.105275965137831e-07, "epoch": 4.895583153304287, "total_flos": 4671824689320284160, "step": 1216900 }, { "loss": 2.56, "learning_rate": 2.097164711178525e-07, "epoch": 4.895985452848482, "total_flos": 4672214412342128640, "step": 1217000 }, { "loss": 2.625, "learning_rate": 2.089053457219219e-07, "epoch": 4.896387752392677, "total_flos": 4672595897627258880, "step": 1217100 }, { "loss": 2.495, "learning_rate": 2.080942203259913e-07, "epoch": 4.896790051936871, "total_flos": 4672998102068367360, "step": 1217200 }, { "loss": 2.58, "learning_rate": 2.0728309493006072e-07, "epoch": 4.897192351481066, "total_flos": 4673390066434437120, "step": 1217300 }, { "loss": 2.595, "learning_rate": 2.0647196953413013e-07, "epoch": 4.89759465102526, "total_flos": 4673782535368519680, "step": 1217400 }, { "loss": 2.58, "learning_rate": 2.056608441381996e-07, "epoch": 4.897996950569455, "total_flos": 4674175296420925440, "step": 1217500 }, { "loss": 2.61, "learning_rate": 2.04849718742269e-07, "epoch": 4.898399250113649, "total_flos": 4674556956977049600, "step": 1217600 }, { "loss": 2.69, "learning_rate": 2.040385933463384e-07, "epoch": 4.898801549657844, "total_flos": 4674933768369008640, "step": 1217700 }, { "loss": 2.65, "learning_rate": 2.032274679504078e-07, "epoch": 4.899203849202038, "total_flos": 4675307743557611520, "step": 1217800 }, { "loss": 2.595, "learning_rate": 2.0241634255447722e-07, "epoch": 4.8996061487462335, "total_flos": 4675706017679462400, "step": 1217900 }, { "loss": 2.585, "learning_rate": 2.0160521715854663e-07, "epoch": 4.9000084482904285, "total_flos": 4676091486396272640, "step": 1218000 }, { "loss": 2.64, "learning_rate": 2.0079409176261606e-07, "epoch": 4.900410747834623, "total_flos": 4676458376387727360, "step": 1218100 }, { "loss": 2.575, "learning_rate": 1.999829663666855e-07, "epoch": 4.900813047378818, "total_flos": 4676846410434539520, "step": 1218200 }, { "loss": 2.46, "learning_rate": 1.991718409707549e-07, "epoch": 4.901215346923012, "total_flos": 4677229393489981440, "step": 1218300 }, { "loss": 2.64, "learning_rate": 1.983607155748243e-07, "epoch": 4.901617646467207, "total_flos": 4677609874950328320, "step": 1218400 }, { "loss": 2.62, "learning_rate": 1.9754959017889372e-07, "epoch": 4.902019946011401, "total_flos": 4678001164788633600, "step": 1218500 }, { "loss": 2.59, "learning_rate": 1.9673846478296312e-07, "epoch": 4.902422245555596, "total_flos": 4678397181632532480, "step": 1218600 }, { "loss": 2.615, "learning_rate": 1.9592733938703256e-07, "epoch": 4.90282454509979, "total_flos": 4678769467846103040, "step": 1218700 }, { "loss": 2.61, "learning_rate": 1.9511621399110197e-07, "epoch": 4.903226844643985, "total_flos": 4679159169622978560, "step": 1218800 }, { "loss": 2.615, "learning_rate": 1.943050885951714e-07, "epoch": 4.90362914418818, "total_flos": 4679531036248412160, "step": 1218900 }, { "loss": 2.56, "learning_rate": 1.934939631992408e-07, "epoch": 4.904031443732374, "total_flos": 4679902201789870080, "step": 1219000 }, { "loss": 2.66, "learning_rate": 1.9268283780331021e-07, "epoch": 4.904433743276569, "total_flos": 4680280447217233920, "step": 1219100 }, { "loss": 2.655, "learning_rate": 1.9187171240737962e-07, "epoch": 4.904836042820763, "total_flos": 4680662761056153600, "step": 1219200 }, { "loss": 2.63, "learning_rate": 1.9106058701144906e-07, "epoch": 4.905238342364958, "total_flos": 4681065693137448960, "step": 1219300 }, { "loss": 2.67, "learning_rate": 1.9024946161551846e-07, "epoch": 4.9056406419091525, "total_flos": 4681455134663454720, "step": 1219400 }, { "loss": 2.63, "learning_rate": 1.8943833621958787e-07, "epoch": 4.9060429414533475, "total_flos": 4681833836857651200, "step": 1219500 }, { "loss": 2.715, "learning_rate": 1.886272108236573e-07, "epoch": 4.906445240997542, "total_flos": 4682197911890718720, "step": 1219600 }, { "loss": 2.65, "learning_rate": 1.8781608542772671e-07, "epoch": 4.906847540541737, "total_flos": 4682579391864606720, "step": 1219700 }, { "loss": 2.64, "learning_rate": 1.8700496003179612e-07, "epoch": 4.907249840085932, "total_flos": 4682969417627258880, "step": 1219800 }, { "loss": 2.655, "learning_rate": 1.8619383463586556e-07, "epoch": 4.907652139630126, "total_flos": 4683343674311700480, "step": 1219900 }, { "loss": 2.635, "learning_rate": 1.8538270923993496e-07, "epoch": 4.90805443917432, "total_flos": 4683733503558389760, "step": 1220000 }, { "loss": 2.635, "learning_rate": 1.8457158384400437e-07, "epoch": 4.908456738718515, "total_flos": 4684106262472519680, "step": 1220100 }, { "loss": 2.69, "learning_rate": 1.8376045844807378e-07, "epoch": 4.90885903826271, "total_flos": 4684490174995353600, "step": 1220200 }, { "loss": 2.615, "learning_rate": 1.829493330521432e-07, "epoch": 4.909261337806904, "total_flos": 4684877709785395200, "step": 1220300 }, { "loss": 2.65, "learning_rate": 1.8213820765621265e-07, "epoch": 4.909663637351099, "total_flos": 4685258462119096320, "step": 1220400 }, { "loss": 2.625, "learning_rate": 1.8132708226028205e-07, "epoch": 4.910065936895293, "total_flos": 4685668962720583680, "step": 1220500 }, { "loss": 2.605, "learning_rate": 1.8051595686435146e-07, "epoch": 4.910468236439488, "total_flos": 4686057575692800000, "step": 1220600 }, { "loss": 2.64, "learning_rate": 1.7970483146842087e-07, "epoch": 4.910870535983682, "total_flos": 4686457544100925440, "step": 1220700 }, { "loss": 2.6, "learning_rate": 1.7889370607249028e-07, "epoch": 4.911272835527877, "total_flos": 4686826452364431360, "step": 1220800 }, { "loss": 2.675, "learning_rate": 1.7808258067655968e-07, "epoch": 4.9116751350720715, "total_flos": 4687200709048872960, "step": 1220900 }, { "loss": 2.58, "learning_rate": 1.7727145528062914e-07, "epoch": 4.912077434616267, "total_flos": 4687573903484866560, "step": 1221000 }, { "loss": 2.625, "learning_rate": 1.7646032988469855e-07, "epoch": 4.912479734160462, "total_flos": 4687952950909808640, "step": 1221100 }, { "loss": 2.61, "learning_rate": 1.7564920448876796e-07, "epoch": 4.912882033704656, "total_flos": 4688343385638113280, "step": 1221200 }, { "loss": 2.58, "learning_rate": 1.7483807909283737e-07, "epoch": 4.913284333248851, "total_flos": 4688713133077893120, "step": 1221300 }, { "loss": 2.77, "learning_rate": 1.7402695369690678e-07, "epoch": 4.913686632793045, "total_flos": 4689093619849482240, "step": 1221400 }, { "loss": 2.685, "learning_rate": 1.7321582830097618e-07, "epoch": 4.91408893233724, "total_flos": 4689475030777221120, "step": 1221500 }, { "loss": 2.665, "learning_rate": 1.7240470290504564e-07, "epoch": 4.914491231881434, "total_flos": 4689873559838699520, "step": 1221600 }, { "loss": 2.585, "learning_rate": 1.7159357750911505e-07, "epoch": 4.914893531425629, "total_flos": 4690270596441108480, "step": 1221700 }, { "loss": 2.585, "learning_rate": 1.7078245211318446e-07, "epoch": 4.915295830969823, "total_flos": 4690637890086973440, "step": 1221800 }, { "loss": 2.695, "learning_rate": 1.6997132671725387e-07, "epoch": 4.915698130514018, "total_flos": 4691018987651420160, "step": 1221900 }, { "loss": 2.625, "learning_rate": 1.6916020132132327e-07, "epoch": 4.916100430058213, "total_flos": 4691400632273817600, "step": 1222000 }, { "loss": 2.63, "learning_rate": 1.6834907592539268e-07, "epoch": 4.916502729602407, "total_flos": 4691788459182182400, "step": 1222100 }, { "loss": 2.595, "learning_rate": 1.6753795052946214e-07, "epoch": 4.916905029146602, "total_flos": 4692168494498181120, "step": 1222200 }, { "loss": 2.645, "learning_rate": 1.6672682513353155e-07, "epoch": 4.9173073286907965, "total_flos": 4692564691924316160, "step": 1222300 }, { "loss": 2.725, "learning_rate": 1.6591569973760096e-07, "epoch": 4.9177096282349915, "total_flos": 4692941211197952000, "step": 1222400 }, { "loss": 2.565, "learning_rate": 1.6510457434167036e-07, "epoch": 4.918111927779186, "total_flos": 4693324502305443840, "step": 1222500 }, { "loss": 2.605, "learning_rate": 1.6429344894573977e-07, "epoch": 4.918514227323381, "total_flos": 4693720487281889280, "step": 1222600 }, { "loss": 2.615, "learning_rate": 1.6348232354980918e-07, "epoch": 4.918916526867575, "total_flos": 4694108500083732480, "step": 1222700 }, { "loss": 2.66, "learning_rate": 1.6267119815387864e-07, "epoch": 4.91931882641177, "total_flos": 4694478369682083840, "step": 1222800 }, { "loss": 2.655, "learning_rate": 1.6186007275794805e-07, "epoch": 4.919721125955965, "total_flos": 4694861926351687680, "step": 1222900 }, { "loss": 2.635, "learning_rate": 1.6104894736201746e-07, "epoch": 4.920123425500159, "total_flos": 4695234998629109760, "step": 1223000 }, { "loss": 2.64, "learning_rate": 1.6023782196608686e-07, "epoch": 4.920525725044353, "total_flos": 4695617567407656960, "step": 1223100 }, { "loss": 2.565, "learning_rate": 1.5942669657015627e-07, "epoch": 4.920928024588548, "total_flos": 4696012240507269120, "step": 1223200 }, { "loss": 2.635, "learning_rate": 1.5861557117422568e-07, "epoch": 4.921330324132743, "total_flos": 4696377813310648320, "step": 1223300 }, { "loss": 2.675, "learning_rate": 1.578044457782951e-07, "epoch": 4.921732623676937, "total_flos": 4696761152219320320, "step": 1223400 }, { "loss": 2.67, "learning_rate": 1.5699332038236452e-07, "epoch": 4.922134923221132, "total_flos": 4697137666181713920, "step": 1223500 }, { "loss": 2.565, "learning_rate": 1.5618219498643395e-07, "epoch": 4.922537222765326, "total_flos": 4697527272356229120, "step": 1223600 }, { "loss": 2.645, "learning_rate": 1.5537106959050336e-07, "epoch": 4.922939522309521, "total_flos": 4697902277925826560, "step": 1223700 }, { "loss": 2.67, "learning_rate": 1.5455994419457277e-07, "epoch": 4.9233418218537155, "total_flos": 4698289833960837120, "step": 1223800 }, { "loss": 2.66, "learning_rate": 1.537488187986422e-07, "epoch": 4.9237441213979105, "total_flos": 4698674197939261440, "step": 1223900 }, { "loss": 2.545, "learning_rate": 1.529376934027116e-07, "epoch": 4.924146420942105, "total_flos": 4699078070110433280, "step": 1224000 }, { "loss": 2.645, "learning_rate": 1.5212656800678102e-07, "epoch": 4.9245487204863, "total_flos": 4699451822226862080, "step": 1224100 }, { "loss": 2.585, "learning_rate": 1.5131544261085043e-07, "epoch": 4.924951020030495, "total_flos": 4699846330677964800, "step": 1224200 }, { "loss": 2.64, "learning_rate": 1.5050431721491986e-07, "epoch": 4.925353319574689, "total_flos": 4700226227901665280, "step": 1224300 }, { "loss": 2.495, "learning_rate": 1.4969319181898927e-07, "epoch": 4.925755619118884, "total_flos": 4700613019117793280, "step": 1224400 }, { "loss": 2.595, "learning_rate": 1.4888206642305868e-07, "epoch": 4.926157918663078, "total_flos": 4701012424534241280, "step": 1224500 }, { "loss": 2.62, "learning_rate": 1.480709410271281e-07, "epoch": 4.926560218207273, "total_flos": 4701395859045273600, "step": 1224600 }, { "loss": 2.63, "learning_rate": 1.4725981563119752e-07, "epoch": 4.926962517751467, "total_flos": 4701786745229168640, "step": 1224700 }, { "loss": 2.635, "learning_rate": 1.4644869023526693e-07, "epoch": 4.927364817295662, "total_flos": 4702151569147392000, "step": 1224800 }, { "loss": 2.575, "learning_rate": 1.4563756483933633e-07, "epoch": 4.927767116839856, "total_flos": 4702546125399674880, "step": 1224900 }, { "loss": 2.61, "learning_rate": 1.4482643944340577e-07, "epoch": 4.928169416384051, "total_flos": 4702929708625489920, "step": 1225000 }, { "loss": 2.605, "learning_rate": 1.4401531404747517e-07, "epoch": 4.928571715928246, "total_flos": 4703316319259381760, "step": 1225100 }, { "loss": 2.57, "learning_rate": 1.4320418865154458e-07, "epoch": 4.92897401547244, "total_flos": 4703707370091786240, "step": 1225200 }, { "loss": 2.625, "learning_rate": 1.4239306325561402e-07, "epoch": 4.929376315016635, "total_flos": 4704095234178846720, "step": 1225300 }, { "loss": 2.51, "learning_rate": 1.4158193785968342e-07, "epoch": 4.9297786145608296, "total_flos": 4704480596670812160, "step": 1225400 }, { "loss": 2.62, "learning_rate": 1.4077081246375283e-07, "epoch": 4.930180914105025, "total_flos": 4704878525561917440, "step": 1225500 }, { "loss": 2.665, "learning_rate": 1.3995968706782227e-07, "epoch": 4.930583213649219, "total_flos": 4705263739339100160, "step": 1225600 }, { "loss": 2.63, "learning_rate": 1.3914856167189167e-07, "epoch": 4.930985513193414, "total_flos": 4705638829888573440, "step": 1225700 }, { "loss": 2.69, "learning_rate": 1.3833743627596108e-07, "epoch": 4.931387812737608, "total_flos": 4706019709692088320, "step": 1225800 }, { "loss": 2.65, "learning_rate": 1.3752631088003051e-07, "epoch": 4.931790112281803, "total_flos": 4706413453324308480, "step": 1225900 }, { "loss": 2.565, "learning_rate": 1.3671518548409992e-07, "epoch": 4.932192411825998, "total_flos": 4706789011263098880, "step": 1226000 }, { "loss": 2.585, "learning_rate": 1.3590406008816933e-07, "epoch": 4.932594711370192, "total_flos": 4707170448747048960, "step": 1226100 }, { "loss": 2.61, "learning_rate": 1.3509293469223876e-07, "epoch": 4.932997010914386, "total_flos": 4707543611315589120, "step": 1226200 }, { "loss": 2.59, "learning_rate": 1.3428180929630817e-07, "epoch": 4.933399310458581, "total_flos": 4707931687852339200, "step": 1226300 }, { "loss": 2.675, "learning_rate": 1.3347068390037758e-07, "epoch": 4.933801610002776, "total_flos": 4708314017624985600, "step": 1226400 }, { "loss": 2.66, "learning_rate": 1.3265955850444701e-07, "epoch": 4.93420390954697, "total_flos": 4708706922080931840, "step": 1226500 }, { "loss": 2.58, "learning_rate": 1.3184843310851642e-07, "epoch": 4.934606209091165, "total_flos": 4709094579029544960, "step": 1226600 }, { "loss": 2.625, "learning_rate": 1.3103730771258583e-07, "epoch": 4.9350085086353594, "total_flos": 4709469552731688960, "step": 1226700 }, { "loss": 2.605, "learning_rate": 1.3022618231665526e-07, "epoch": 4.9354108081795545, "total_flos": 4709851919683031040, "step": 1226800 }, { "loss": 2.585, "learning_rate": 1.2941505692072467e-07, "epoch": 4.935813107723749, "total_flos": 4710206216719134720, "step": 1226900 }, { "loss": 2.545, "learning_rate": 1.2860393152479408e-07, "epoch": 4.936215407267944, "total_flos": 4710583962889728000, "step": 1227000 }, { "loss": 2.645, "learning_rate": 1.277928061288635e-07, "epoch": 4.936617706812138, "total_flos": 4710940448157634560, "step": 1227100 }, { "loss": 2.58, "learning_rate": 1.2698168073293292e-07, "epoch": 4.937020006356333, "total_flos": 4711326331151339520, "step": 1227200 }, { "loss": 2.675, "learning_rate": 1.2617055533700233e-07, "epoch": 4.937422305900528, "total_flos": 4711695589956833280, "step": 1227300 }, { "loss": 2.615, "learning_rate": 1.2535942994107176e-07, "epoch": 4.937824605444722, "total_flos": 4712083268150415360, "step": 1227400 }, { "loss": 2.665, "learning_rate": 1.2454830454514117e-07, "epoch": 4.938226904988917, "total_flos": 4712453387377152000, "step": 1227500 }, { "loss": 2.58, "learning_rate": 1.2373717914921058e-07, "epoch": 4.938629204533111, "total_flos": 4712834952330915840, "step": 1227600 }, { "loss": 2.605, "learning_rate": 1.2292605375328e-07, "epoch": 4.939031504077306, "total_flos": 4713227431887482880, "step": 1227700 }, { "loss": 2.615, "learning_rate": 1.2211492835734942e-07, "epoch": 4.9394338036215, "total_flos": 4713604269835653120, "step": 1227800 }, { "loss": 2.665, "learning_rate": 1.2130380296141883e-07, "epoch": 4.939836103165695, "total_flos": 4713982090363637760, "step": 1227900 }, { "loss": 2.615, "learning_rate": 1.2049267756548826e-07, "epoch": 4.940238402709889, "total_flos": 4714374920462192640, "step": 1228000 }, { "loss": 2.64, "learning_rate": 1.1968155216955767e-07, "epoch": 4.940640702254084, "total_flos": 4714769237708574720, "step": 1228100 }, { "loss": 2.585, "learning_rate": 1.1887042677362708e-07, "epoch": 4.941043001798279, "total_flos": 4715157006193274880, "step": 1228200 }, { "loss": 2.57, "learning_rate": 1.180593013776965e-07, "epoch": 4.9414453013424735, "total_flos": 4715540637220270080, "step": 1228300 }, { "loss": 2.64, "learning_rate": 1.172481759817659e-07, "epoch": 4.9418476008866685, "total_flos": 4715916163291607040, "step": 1228400 }, { "loss": 2.69, "learning_rate": 1.1643705058583532e-07, "epoch": 4.942249900430863, "total_flos": 4716304122981027840, "step": 1228500 }, { "loss": 2.575, "learning_rate": 1.1562592518990475e-07, "epoch": 4.942652199975058, "total_flos": 4716673838553354240, "step": 1228600 }, { "loss": 2.6, "learning_rate": 1.1481479979397415e-07, "epoch": 4.943054499519252, "total_flos": 4717065372708802560, "step": 1228700 }, { "loss": 2.705, "learning_rate": 1.1400367439804357e-07, "epoch": 4.943456799063447, "total_flos": 4717438556522311680, "step": 1228800 }, { "loss": 2.6, "learning_rate": 1.13192549002113e-07, "epoch": 4.943859098607641, "total_flos": 4717827551903969280, "step": 1228900 }, { "loss": 2.65, "learning_rate": 1.123814236061824e-07, "epoch": 4.944261398151836, "total_flos": 4718212569165189120, "step": 1229000 }, { "loss": 2.615, "learning_rate": 1.1157029821025181e-07, "epoch": 4.944663697696031, "total_flos": 4718595026407649280, "step": 1229100 }, { "loss": 2.655, "learning_rate": 1.1075917281432124e-07, "epoch": 4.945065997240225, "total_flos": 4718984489178624000, "step": 1229200 }, { "loss": 2.675, "learning_rate": 1.0994804741839065e-07, "epoch": 4.94546829678442, "total_flos": 4719372167372206080, "step": 1229300 }, { "loss": 2.54, "learning_rate": 1.0913692202246006e-07, "epoch": 4.945870596328614, "total_flos": 4719773697285550080, "step": 1229400 }, { "loss": 2.61, "learning_rate": 1.083257966265295e-07, "epoch": 4.946272895872809, "total_flos": 4720148548829122560, "step": 1229500 }, { "loss": 2.635, "learning_rate": 1.075146712305989e-07, "epoch": 4.946675195417003, "total_flos": 4720523740292198400, "step": 1229600 }, { "loss": 2.67, "learning_rate": 1.0670354583466831e-07, "epoch": 4.947077494961198, "total_flos": 4720913394267893760, "step": 1229700 }, { "loss": 2.645, "learning_rate": 1.0589242043873774e-07, "epoch": 4.9474797945053925, "total_flos": 4721308072678748160, "step": 1229800 }, { "loss": 2.66, "learning_rate": 1.0508129504280715e-07, "epoch": 4.947882094049588, "total_flos": 4721688086749777920, "step": 1229900 }, { "loss": 2.665, "learning_rate": 1.0427016964687656e-07, "epoch": 4.948284393593782, "total_flos": 4722070103159132160, "step": 1230000 }, { "loss": 2.6, "learning_rate": 1.0345904425094599e-07, "epoch": 4.948686693137977, "total_flos": 4722466837020733440, "step": 1230100 }, { "loss": 2.555, "learning_rate": 1.026479188550154e-07, "epoch": 4.949088992682171, "total_flos": 4722843297870704640, "step": 1230200 }, { "loss": 2.635, "learning_rate": 1.0183679345908481e-07, "epoch": 4.949491292226366, "total_flos": 4723228697541365760, "step": 1230300 }, { "loss": 2.61, "learning_rate": 1.0102566806315424e-07, "epoch": 4.949893591770561, "total_flos": 4723601785752514560, "step": 1230400 }, { "loss": 2.59, "learning_rate": 1.0021454266722365e-07, "epoch": 4.950295891314755, "total_flos": 4723985321177149440, "step": 1230500 }, { "loss": 2.655, "learning_rate": 9.940341727129306e-08, "epoch": 4.95069819085895, "total_flos": 4724375782461665280, "step": 1230600 }, { "loss": 2.655, "learning_rate": 9.859229187536248e-08, "epoch": 4.951100490403144, "total_flos": 4724749561134305280, "step": 1230700 }, { "loss": 2.635, "learning_rate": 9.77811664794319e-08, "epoch": 4.951502789947339, "total_flos": 4725126579664711680, "step": 1230800 }, { "loss": 2.59, "learning_rate": 9.69700410835013e-08, "epoch": 4.951905089491533, "total_flos": 4725517603940904960, "step": 1230900 }, { "loss": 2.59, "learning_rate": 9.615891568757073e-08, "epoch": 4.952307389035728, "total_flos": 4725903810920386560, "step": 1231000 }, { "loss": 2.675, "learning_rate": 9.534779029164015e-08, "epoch": 4.952709688579922, "total_flos": 4726282205062533120, "step": 1231100 }, { "loss": 2.58, "learning_rate": 9.453666489570956e-08, "epoch": 4.9531119881241175, "total_flos": 4726664858820956160, "step": 1231200 }, { "loss": 2.61, "learning_rate": 9.372553949977898e-08, "epoch": 4.9535142876683125, "total_flos": 4727053376190812160, "step": 1231300 }, { "loss": 2.645, "learning_rate": 9.291441410384838e-08, "epoch": 4.953916587212507, "total_flos": 4727439158270914560, "step": 1231400 }, { "loss": 2.65, "learning_rate": 9.21032887079178e-08, "epoch": 4.954318886756702, "total_flos": 4727837697954877440, "step": 1231500 }, { "loss": 2.61, "learning_rate": 9.129216331198723e-08, "epoch": 4.954721186300896, "total_flos": 4728226528688025600, "step": 1231600 }, { "loss": 2.665, "learning_rate": 9.048103791605663e-08, "epoch": 4.955123485845091, "total_flos": 4728613882895831040, "step": 1231700 }, { "loss": 2.59, "learning_rate": 8.966991252012605e-08, "epoch": 4.955525785389285, "total_flos": 4729004121108172800, "step": 1231800 }, { "loss": 2.695, "learning_rate": 8.885878712419547e-08, "epoch": 4.95592808493348, "total_flos": 4729377288987955200, "step": 1231900 }, { "loss": 2.585, "learning_rate": 8.804766172826488e-08, "epoch": 4.956330384477674, "total_flos": 4729778250598379520, "step": 1232000 }, { "loss": 2.585, "learning_rate": 8.723653633233432e-08, "epoch": 4.956732684021869, "total_flos": 4730168950888796160, "step": 1232100 }, { "loss": 2.58, "learning_rate": 8.642541093640372e-08, "epoch": 4.957134983566064, "total_flos": 4730533498622423040, "step": 1232200 }, { "loss": 2.71, "learning_rate": 8.561428554047313e-08, "epoch": 4.957537283110258, "total_flos": 4730903585981706240, "step": 1232300 }, { "loss": 2.66, "learning_rate": 8.480316014454257e-08, "epoch": 4.957939582654453, "total_flos": 4731286643394539520, "step": 1232400 }, { "loss": 2.56, "learning_rate": 8.399203474861197e-08, "epoch": 4.958341882198647, "total_flos": 4731680126775889920, "step": 1232500 }, { "loss": 2.61, "learning_rate": 8.318090935268138e-08, "epoch": 4.958744181742842, "total_flos": 4732066928614502400, "step": 1232600 }, { "loss": 2.655, "learning_rate": 8.236978395675082e-08, "epoch": 4.9591464812870365, "total_flos": 4732449035314974720, "step": 1232700 }, { "loss": 2.625, "learning_rate": 8.155865856082022e-08, "epoch": 4.9595487808312315, "total_flos": 4732817067223511040, "step": 1232800 }, { "loss": 2.56, "learning_rate": 8.074753316488963e-08, "epoch": 4.959951080375426, "total_flos": 4733190612201492480, "step": 1232900 }, { "loss": 2.67, "learning_rate": 7.993640776895905e-08, "epoch": 4.960353379919621, "total_flos": 4733562468204441600, "step": 1233000 }, { "loss": 2.77, "learning_rate": 7.912528237302847e-08, "epoch": 4.960755679463815, "total_flos": 4733951304248832000, "step": 1233100 }, { "loss": 2.63, "learning_rate": 7.831415697709788e-08, "epoch": 4.96115797900801, "total_flos": 4734337585585704960, "step": 1233200 }, { "loss": 2.675, "learning_rate": 7.75030315811673e-08, "epoch": 4.961560278552204, "total_flos": 4734722778117918720, "step": 1233300 }, { "loss": 2.71, "learning_rate": 7.669190618523672e-08, "epoch": 4.961962578096399, "total_flos": 4735101751185469440, "step": 1233400 }, { "loss": 2.705, "learning_rate": 7.588078078930613e-08, "epoch": 4.962364877640594, "total_flos": 4735482296380723200, "step": 1233500 }, { "loss": 2.69, "learning_rate": 7.506965539337555e-08, "epoch": 4.962767177184788, "total_flos": 4735861991777218560, "step": 1233600 }, { "loss": 2.575, "learning_rate": 7.425852999744496e-08, "epoch": 4.963169476728983, "total_flos": 4736231850753085440, "step": 1233700 }, { "loss": 2.565, "learning_rate": 7.344740460151438e-08, "epoch": 4.963571776273177, "total_flos": 4736611052204052480, "step": 1233800 }, { "loss": 2.615, "learning_rate": 7.263627920558379e-08, "epoch": 4.963974075817372, "total_flos": 4737003988527452160, "step": 1233900 }, { "loss": 2.575, "learning_rate": 7.182515380965321e-08, "epoch": 4.964376375361566, "total_flos": 4737385872155750400, "step": 1234000 }, { "loss": 2.54, "learning_rate": 7.101402841372263e-08, "epoch": 4.964778674905761, "total_flos": 4737763432432865280, "step": 1234100 }, { "loss": 2.65, "learning_rate": 7.020290301779204e-08, "epoch": 4.9651809744499555, "total_flos": 4738152512794398720, "step": 1234200 }, { "loss": 2.685, "learning_rate": 6.939177762186146e-08, "epoch": 4.965583273994151, "total_flos": 4738544052261089280, "step": 1234300 }, { "loss": 2.71, "learning_rate": 6.858065222593088e-08, "epoch": 4.965985573538346, "total_flos": 4738927869181562880, "step": 1234400 }, { "loss": 2.535, "learning_rate": 6.776952683000028e-08, "epoch": 4.96638787308254, "total_flos": 4739327476425216000, "step": 1234500 }, { "loss": 2.585, "learning_rate": 6.69584014340697e-08, "epoch": 4.966790172626735, "total_flos": 4739715276777369600, "step": 1234600 }, { "loss": 2.615, "learning_rate": 6.614727603813913e-08, "epoch": 4.967192472170929, "total_flos": 4740110040168099840, "step": 1234700 }, { "loss": 2.675, "learning_rate": 6.533615064220853e-08, "epoch": 4.967594771715124, "total_flos": 4740496698603171840, "step": 1234800 }, { "loss": 2.575, "learning_rate": 6.452502524627795e-08, "epoch": 4.967997071259318, "total_flos": 4740888556744396800, "step": 1234900 }, { "loss": 2.595, "learning_rate": 6.371389985034738e-08, "epoch": 4.968399370803513, "total_flos": 4741256503673057280, "step": 1235000 }, { "loss": 2.66, "learning_rate": 6.290277445441678e-08, "epoch": 4.968801670347707, "total_flos": 4741655563858759680, "step": 1235100 }, { "loss": 2.635, "learning_rate": 6.20916490584862e-08, "epoch": 4.969203969891902, "total_flos": 4742024084401582080, "step": 1235200 }, { "loss": 2.61, "learning_rate": 6.128052366255562e-08, "epoch": 4.969606269436097, "total_flos": 4742399472380620800, "step": 1235300 }, { "loss": 2.695, "learning_rate": 6.046939826662503e-08, "epoch": 4.970008568980291, "total_flos": 4742790385120727040, "step": 1235400 }, { "loss": 2.615, "learning_rate": 5.965827287069445e-08, "epoch": 4.970410868524486, "total_flos": 4743145770961489920, "step": 1235500 }, { "loss": 2.605, "learning_rate": 5.884714747476387e-08, "epoch": 4.9708131680686805, "total_flos": 4743526193998172160, "step": 1235600 }, { "loss": 2.625, "learning_rate": 5.803602207883328e-08, "epoch": 4.9712154676128755, "total_flos": 4743907381853736960, "step": 1235700 }, { "loss": 2.59, "learning_rate": 5.7224896682902696e-08, "epoch": 4.97161776715707, "total_flos": 4744295543370362880, "step": 1235800 }, { "loss": 2.655, "learning_rate": 5.641377128697212e-08, "epoch": 4.972020066701265, "total_flos": 4744701391323648000, "step": 1235900 }, { "loss": 2.62, "learning_rate": 5.5602645891041524e-08, "epoch": 4.972422366245459, "total_flos": 4745086095221575680, "step": 1236000 }, { "loss": 2.61, "learning_rate": 5.4791520495110945e-08, "epoch": 4.972824665789654, "total_flos": 4745472615564349440, "step": 1236100 }, { "loss": 2.575, "learning_rate": 5.3980395099180366e-08, "epoch": 4.973226965333848, "total_flos": 4745868542117130240, "step": 1236200 }, { "loss": 2.575, "learning_rate": 5.3169269703249774e-08, "epoch": 4.973629264878043, "total_flos": 4746262652225064960, "step": 1236300 }, { "loss": 2.64, "learning_rate": 5.2358144307319195e-08, "epoch": 4.974031564422237, "total_flos": 4746641635915100160, "step": 1236400 }, { "loss": 2.57, "learning_rate": 5.1547018911388615e-08, "epoch": 4.974433863966432, "total_flos": 4747010799118233600, "step": 1236500 }, { "loss": 2.575, "learning_rate": 5.073589351545802e-08, "epoch": 4.974836163510627, "total_flos": 4747393856531066880, "step": 1236600 }, { "loss": 2.525, "learning_rate": 4.9924768119527444e-08, "epoch": 4.975238463054821, "total_flos": 4747785958989434880, "step": 1236700 }, { "loss": 2.61, "learning_rate": 4.911364272359686e-08, "epoch": 4.975640762599016, "total_flos": 4748173026390159360, "step": 1236800 }, { "loss": 2.63, "learning_rate": 4.830251732766627e-08, "epoch": 4.97604306214321, "total_flos": 4748554697568768000, "step": 1236900 }, { "loss": 2.635, "learning_rate": 4.749139193173569e-08, "epoch": 4.976445361687405, "total_flos": 4748917747532083200, "step": 1237000 }, { "loss": 2.6, "learning_rate": 4.668026653580511e-08, "epoch": 4.9768476612315995, "total_flos": 4749293417006960640, "step": 1237100 }, { "loss": 2.665, "learning_rate": 4.586914113987452e-08, "epoch": 4.9772499607757945, "total_flos": 4749673946268487680, "step": 1237200 }, { "loss": 2.635, "learning_rate": 4.5058015743943936e-08, "epoch": 4.977652260319989, "total_flos": 4750033820108943360, "step": 1237300 }, { "loss": 2.645, "learning_rate": 4.424689034801336e-08, "epoch": 4.978054559864184, "total_flos": 4750418157531156480, "step": 1237400 }, { "loss": 2.6, "learning_rate": 4.3435764952082764e-08, "epoch": 4.978456859408379, "total_flos": 4750808432922193920, "step": 1237500 }, { "loss": 2.68, "learning_rate": 4.2624639556152185e-08, "epoch": 4.978859158952573, "total_flos": 4751187347566080000, "step": 1237600 }, { "loss": 2.64, "learning_rate": 4.1813514160221606e-08, "epoch": 4.979261458496768, "total_flos": 4751563590655119360, "step": 1237700 }, { "loss": 2.665, "learning_rate": 4.1002388764291014e-08, "epoch": 4.979663758040962, "total_flos": 4751944778510684160, "step": 1237800 }, { "loss": 2.695, "learning_rate": 4.0191263368360435e-08, "epoch": 4.980066057585157, "total_flos": 4752320580766617600, "step": 1237900 }, { "loss": 2.63, "learning_rate": 3.9380137972429855e-08, "epoch": 4.980468357129351, "total_flos": 4752702251945226240, "step": 1238000 }, { "loss": 2.635, "learning_rate": 3.856901257649927e-08, "epoch": 4.980870656673546, "total_flos": 4753092527336263680, "step": 1238100 }, { "loss": 2.66, "learning_rate": 3.7757887180568684e-08, "epoch": 4.98127295621774, "total_flos": 4753490233155194880, "step": 1238200 }, { "loss": 2.495, "learning_rate": 3.69467617846381e-08, "epoch": 4.981675255761935, "total_flos": 4753879802151014400, "step": 1238300 }, { "loss": 2.665, "learning_rate": 3.613563638870751e-08, "epoch": 4.98207755530613, "total_flos": 4754280561934233600, "step": 1238400 }, { "loss": 2.575, "learning_rate": 3.5324510992776927e-08, "epoch": 4.982479854850324, "total_flos": 4754644015551959040, "step": 1238500 }, { "loss": 2.61, "learning_rate": 3.451338559684635e-08, "epoch": 4.982882154394519, "total_flos": 4755027529731624960, "step": 1238600 }, { "loss": 2.62, "learning_rate": 3.370226020091576e-08, "epoch": 4.9832844539387136, "total_flos": 4755404707599298560, "step": 1238700 }, { "loss": 2.62, "learning_rate": 3.2891134804985176e-08, "epoch": 4.983686753482909, "total_flos": 4755783181410078720, "step": 1238800 }, { "loss": 2.625, "learning_rate": 3.2080009409054597e-08, "epoch": 4.984089053027103, "total_flos": 4756167157667819520, "step": 1238900 }, { "loss": 2.62, "learning_rate": 3.126888401312401e-08, "epoch": 4.984491352571298, "total_flos": 4756552764476928000, "step": 1239000 }, { "loss": 2.58, "learning_rate": 3.045775861719343e-08, "epoch": 4.984893652115492, "total_flos": 4756935418235351040, "step": 1239100 }, { "loss": 2.705, "learning_rate": 2.9646633221262843e-08, "epoch": 4.985295951659687, "total_flos": 4757316266171412480, "step": 1239200 }, { "loss": 2.645, "learning_rate": 2.883550782533226e-08, "epoch": 4.985698251203882, "total_flos": 4757692865113681920, "step": 1239300 }, { "loss": 2.59, "learning_rate": 2.8024382429401678e-08, "epoch": 4.986100550748076, "total_flos": 4758078455989063680, "step": 1239400 }, { "loss": 2.64, "learning_rate": 2.7213257033471092e-08, "epoch": 4.98650285029227, "total_flos": 4758457524658974720, "step": 1239500 }, { "loss": 2.66, "learning_rate": 2.6402131637540506e-08, "epoch": 4.986905149836465, "total_flos": 4758835345186959360, "step": 1239600 }, { "loss": 2.565, "learning_rate": 2.5591006241609927e-08, "epoch": 4.98730744938066, "total_flos": 4759215954117120000, "step": 1239700 }, { "loss": 2.61, "learning_rate": 2.477988084567934e-08, "epoch": 4.987709748924854, "total_flos": 4759600764239892480, "step": 1239800 }, { "loss": 2.63, "learning_rate": 2.3968755449748756e-08, "epoch": 4.988112048469049, "total_flos": 4759987003086827520, "step": 1239900 }, { "loss": 2.665, "learning_rate": 2.3157630053818173e-08, "epoch": 4.9885143480132434, "total_flos": 4760363129328537600, "step": 1240000 }, { "loss": 2.62, "learning_rate": 2.2346504657887587e-08, "epoch": 4.9889166475574385, "total_flos": 4760749867432243200, "step": 1240100 }, { "loss": 2.7, "learning_rate": 2.1535379261957e-08, "epoch": 4.989318947101633, "total_flos": 4761114245206118400, "step": 1240200 }, { "loss": 2.605, "learning_rate": 2.0724253866026422e-08, "epoch": 4.989721246645828, "total_flos": 4761495996053360640, "step": 1240300 }, { "loss": 2.675, "learning_rate": 1.9913128470095837e-08, "epoch": 4.990123546190022, "total_flos": 4761878479852032000, "step": 1240400 }, { "loss": 2.56, "learning_rate": 1.9102003074165254e-08, "epoch": 4.990525845734217, "total_flos": 4762261505397411840, "step": 1240500 }, { "loss": 2.73, "learning_rate": 1.829087767823467e-08, "epoch": 4.990928145278412, "total_flos": 4762629516060979200, "step": 1240600 }, { "loss": 2.65, "learning_rate": 1.7479752282304083e-08, "epoch": 4.991330444822606, "total_flos": 4763023089733447680, "step": 1240700 }, { "loss": 2.575, "learning_rate": 1.66686268863735e-08, "epoch": 4.991732744366801, "total_flos": 4763411203448893440, "step": 1240800 }, { "loss": 2.555, "learning_rate": 1.5857501490442918e-08, "epoch": 4.992135043910995, "total_flos": 4763783733979607040, "step": 1240900 }, { "loss": 2.66, "learning_rate": 1.5046376094512332e-08, "epoch": 4.99253734345519, "total_flos": 4764166520519086080, "step": 1241000 }, { "loss": 2.665, "learning_rate": 1.4235250698581748e-08, "epoch": 4.992939642999384, "total_flos": 4764551394376765440, "step": 1241100 }, { "loss": 2.545, "learning_rate": 1.3424125302651165e-08, "epoch": 4.993341942543579, "total_flos": 4764931100395745280, "step": 1241200 }, { "loss": 2.665, "learning_rate": 1.261299990672058e-08, "epoch": 4.993744242087773, "total_flos": 4765310073463296000, "step": 1241300 }, { "loss": 2.615, "learning_rate": 1.1801874510789995e-08, "epoch": 4.994146541631968, "total_flos": 4765702600821043200, "step": 1241400 }, { "loss": 2.525, "learning_rate": 1.0990749114859413e-08, "epoch": 4.994548841176163, "total_flos": 4766076236090142720, "step": 1241500 }, { "loss": 2.555, "learning_rate": 1.0179623718928829e-08, "epoch": 4.9949511407203575, "total_flos": 4766462198752481280, "step": 1241600 }, { "loss": 2.635, "learning_rate": 9.368498322998245e-09, "epoch": 4.9953534402645525, "total_flos": 4766858438668554240, "step": 1241700 }, { "loss": 2.52, "learning_rate": 8.55737292706766e-09, "epoch": 4.995755739808747, "total_flos": 4767238473984552960, "step": 1241800 }, { "loss": 2.7, "learning_rate": 7.746247531137077e-09, "epoch": 4.996158039352942, "total_flos": 4767616724723159040, "step": 1241900 }, { "loss": 2.58, "learning_rate": 6.935122135206493e-09, "epoch": 4.996560338897136, "total_flos": 4768010776407429120, "step": 1242000 }, { "loss": 2.63, "learning_rate": 6.123996739275909e-09, "epoch": 4.996962638441331, "total_flos": 4768384374497832960, "step": 1242100 }, { "loss": 2.63, "learning_rate": 5.312871343345324e-09, "epoch": 4.997364937985525, "total_flos": 4768770719569612800, "step": 1242200 }, { "loss": 2.62, "learning_rate": 4.501745947414741e-09, "epoch": 4.99776723752972, "total_flos": 4769160835623383040, "step": 1242300 }, { "loss": 2.665, "learning_rate": 3.690620551484157e-09, "epoch": 4.998169537073915, "total_flos": 4769533259929251840, "step": 1242400 }, { "loss": 2.62, "learning_rate": 2.8794951555535727e-09, "epoch": 4.998571836618109, "total_flos": 4769916450123141120, "step": 1242500 }, { "loss": 2.59, "learning_rate": 2.068369759622989e-09, "epoch": 4.998974136162303, "total_flos": 4770300697254236160, "step": 1242600 }, { "loss": 2.63, "learning_rate": 1.2572443636924053e-09, "epoch": 4.999376435706498, "total_flos": 4770684137076510720, "step": 1242700 }, { "loss": 2.6, "learning_rate": 4.461189677618212e-10, "epoch": 4.999778735250693, "total_flos": 4771073185570590720, "step": 1242800 } ]