{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.647834234848162, "global_step": 500001, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "FLOPS loss": 5.392782622948289e-05, "L0_d": 15092.36, "MLM loss": 8.760412216186523, "epoch": 0.01, "step": 499 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 9.6927, "step": 500 }, { "FLOPS loss": 0.0008161979494616389, "L0_d": 27461.33, "MLM loss": 7.4083781242370605, "epoch": 0.02, "step": 999 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 7.9871, "step": 1000 }, { "FLOPS loss": 0.0027657910250127316, "L0_d": 29453.2, "MLM loss": 6.897146224975586, "epoch": 0.03, "step": 1499 }, { "epoch": 0.03, "learning_rate": 1.5e-05, "loss": 7.0651, "step": 1500 }, { "FLOPS loss": 0.005620635114610195, "L0_d": 30316.53, "MLM loss": 6.6747026443481445, "epoch": 0.04, "step": 1999 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 6.7723, "step": 2000 }, { "FLOPS loss": 0.010232314467430115, "L0_d": 30685.58, "MLM loss": 6.474103927612305, "epoch": 0.05, "step": 2499 }, { "epoch": 0.05, "learning_rate": 2.5e-05, "loss": 6.615, "step": 2500 }, { "FLOPS loss": 0.01670052483677864, "L0_d": 31151.2, "MLM loss": 6.448917388916016, "epoch": 0.06, "step": 2999 }, { "epoch": 0.06, "learning_rate": 3e-05, "loss": 6.487, "step": 3000 }, { "FLOPS loss": 0.022464273497462273, "L0_d": 30915.17, "MLM loss": 5.781277179718018, "epoch": 0.07, "step": 3499 }, { "epoch": 0.07, "learning_rate": 3.5e-05, "loss": 6.1663, "step": 3500 }, { "FLOPS loss": 0.029723184183239937, "L0_d": 30589.05, "MLM loss": 5.28702449798584, "epoch": 0.09, "step": 3999 }, { "epoch": 0.09, "learning_rate": 4e-05, "loss": 5.5923, "step": 4000 }, { "FLOPS loss": 0.0326639786362648, "L0_d": 28512.73, "MLM loss": 4.979520797729492, "epoch": 0.1, "step": 4499 }, { "epoch": 0.1, "learning_rate": 4.5e-05, "loss": 5.0652, "step": 4500 }, { "FLOPS loss": 0.03443261235952377, "L0_d": 25869.95, "MLM loss": 4.665562629699707, "epoch": 0.11, "step": 4999 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 4.7192, "step": 5000 }, { "FLOPS loss": 0.03417317196726799, "L0_d": 23801.28, "MLM loss": 4.246782302856445, "epoch": 0.12, "step": 5499 }, { "epoch": 0.12, "learning_rate": 5.500000000000001e-05, "loss": 4.4723, "step": 5500 }, { "FLOPS loss": 0.031081825494766235, "L0_d": 19494.3, "MLM loss": 4.069106101989746, "epoch": 0.13, "step": 5999 }, { "epoch": 0.13, "learning_rate": 6e-05, "loss": 4.2738, "step": 6000 }, { "FLOPS loss": 0.028846798464655876, "L0_d": 16482.78, "MLM loss": 3.9512133598327637, "epoch": 0.14, "step": 6499 }, { "epoch": 0.14, "learning_rate": 6.500000000000001e-05, "loss": 4.1219, "step": 6500 }, { "FLOPS loss": 0.02576448954641819, "L0_d": 13962.62, "MLM loss": 3.9717659950256348, "epoch": 0.15, "step": 6999 }, { "epoch": 0.15, "learning_rate": 7e-05, "loss": 4.0019, "step": 7000 }, { "FLOPS loss": 0.03054131008684635, "L0_d": 14436.58, "MLM loss": 3.9484663009643555, "epoch": 0.16, "step": 7499 }, { "epoch": 0.16, "learning_rate": 7.500000000000001e-05, "loss": 3.8882, "step": 7500 }, { "FLOPS loss": 0.027292482554912567, "L0_d": 12377.77, "MLM loss": 3.9403085708618164, "epoch": 0.17, "step": 7999 }, { "epoch": 0.17, "learning_rate": 8e-05, "loss": 3.7931, "step": 8000 }, { "FLOPS loss": 0.02762710675597191, "L0_d": 10414.95, "MLM loss": 3.5601937770843506, "epoch": 0.18, "step": 8499 }, { "epoch": 0.18, "learning_rate": 8.5e-05, "loss": 3.6941, "step": 8500 }, { "FLOPS loss": 0.02418929897248745, "L0_d": 8721.27, "MLM loss": 3.5979204177856445, "epoch": 0.19, "step": 8999 }, { "epoch": 0.19, "learning_rate": 9e-05, "loss": 3.5589, "step": 9000 }, { "FLOPS loss": 0.02961421199142933, "L0_d": 9450.05, "MLM loss": 3.2906453609466553, "epoch": 0.2, "step": 9499 }, { "epoch": 0.2, "learning_rate": 9.5e-05, "loss": 3.4316, "step": 9500 }, { "FLOPS loss": 0.02749692089855671, "L0_d": 7999.8, "MLM loss": 3.2580208778381348, "epoch": 0.21, "step": 9999 }, { "epoch": 0.21, "learning_rate": 9.999000000000001e-05, "loss": 3.334, "step": 10000 }, { "FLOPS loss": 0.02673165127635002, "L0_d": 7029.53, "MLM loss": 3.250258684158325, "epoch": 0.22, "step": 10499 }, { "epoch": 0.22, "learning_rate": 9.989816326530613e-05, "loss": 3.2474, "step": 10500 }, { "FLOPS loss": 0.027633745223283768, "L0_d": 7096.09, "MLM loss": 3.3386759757995605, "epoch": 0.23, "step": 10999 }, { "epoch": 0.23, "learning_rate": 9.97961224489796e-05, "loss": 3.1775, "step": 11000 }, { "FLOPS loss": 0.026252513751387596, "L0_d": 6960.83, "MLM loss": 3.018740177154541, "epoch": 0.24, "step": 11499 }, { "epoch": 0.24, "learning_rate": 9.969408163265307e-05, "loss": 3.119, "step": 11500 }, { "FLOPS loss": 0.018239092081785202, "L0_d": 5148.5, "MLM loss": 3.0682997703552246, "epoch": 0.26, "step": 11999 }, { "epoch": 0.26, "learning_rate": 9.959204081632653e-05, "loss": 3.0649, "step": 12000 }, { "FLOPS loss": 0.023073602467775345, "L0_d": 5836.48, "MLM loss": 3.1717731952667236, "epoch": 0.27, "step": 12499 }, { "epoch": 0.27, "learning_rate": 9.949020408163265e-05, "loss": 3.017, "step": 12500 }, { "FLOPS loss": 0.024667827412486076, "L0_d": 5631.75, "MLM loss": 2.7986183166503906, "epoch": 0.28, "step": 12999 }, { "epoch": 0.28, "learning_rate": 9.938816326530612e-05, "loss": 2.9808, "step": 13000 }, { "FLOPS loss": 0.023629698902368546, "L0_d": 5088.77, "MLM loss": 2.7454609870910645, "epoch": 0.29, "step": 13499 }, { "epoch": 0.29, "learning_rate": 9.92861224489796e-05, "loss": 2.9405, "step": 13500 }, { "FLOPS loss": 0.024097440764307976, "L0_d": 5203.92, "MLM loss": 2.75523042678833, "epoch": 0.3, "step": 13999 }, { "epoch": 0.3, "learning_rate": 9.918408163265308e-05, "loss": 2.9051, "step": 14000 }, { "FLOPS loss": 0.03014693595468998, "L0_d": 5187.09, "MLM loss": 2.7530577182769775, "epoch": 0.31, "step": 14499 }, { "epoch": 0.31, "learning_rate": 9.908224489795918e-05, "loss": 2.8747, "step": 14500 }, { "FLOPS loss": 0.024567320942878723, "L0_d": 4308.94, "MLM loss": 2.730717182159424, "epoch": 0.32, "step": 14999 }, { "epoch": 0.32, "learning_rate": 9.898020408163266e-05, "loss": 2.8491, "step": 15000 }, { "FLOPS loss": 0.025582976639270782, "L0_d": 5050.28, "MLM loss": 3.0116822719573975, "epoch": 0.33, "step": 15499 }, { "epoch": 0.33, "learning_rate": 9.887816326530613e-05, "loss": 2.824, "step": 15500 }, { "FLOPS loss": 0.02788759395480156, "L0_d": 4436.11, "MLM loss": 2.500883102416992, "epoch": 0.34, "step": 15999 }, { "epoch": 0.34, "learning_rate": 9.877612244897959e-05, "loss": 2.7988, "step": 16000 }, { "FLOPS loss": 0.033916205167770386, "L0_d": 4878.62, "MLM loss": 2.69112229347229, "epoch": 0.35, "step": 16499 }, { "epoch": 0.35, "learning_rate": 9.867428571428572e-05, "loss": 2.7721, "step": 16500 }, { "FLOPS loss": 0.026273198425769806, "L0_d": 4141.28, "MLM loss": 2.696350336074829, "epoch": 0.36, "step": 16999 }, { "epoch": 0.36, "learning_rate": 9.857224489795919e-05, "loss": 2.7496, "step": 17000 }, { "FLOPS loss": 0.02965918742120266, "L0_d": 3847.95, "MLM loss": 2.7344136238098145, "epoch": 0.37, "step": 17499 }, { "epoch": 0.37, "learning_rate": 9.847020408163265e-05, "loss": 2.7262, "step": 17500 }, { "FLOPS loss": 0.027983248233795166, "L0_d": 3737.28, "MLM loss": 2.6528053283691406, "epoch": 0.38, "step": 17999 }, { "epoch": 0.38, "learning_rate": 9.836816326530612e-05, "loss": 2.7069, "step": 18000 }, { "FLOPS loss": 0.03850917890667915, "L0_d": 4032.44, "MLM loss": 2.837846279144287, "epoch": 0.39, "step": 18499 }, { "epoch": 0.39, "learning_rate": 9.826632653061225e-05, "loss": 2.6941, "step": 18500 }, { "FLOPS loss": 0.03680583834648132, "L0_d": 4449.95, "MLM loss": 2.7556967735290527, "epoch": 0.4, "step": 18999 }, { "epoch": 0.4, "learning_rate": 9.816428571428572e-05, "loss": 2.6743, "step": 19000 }, { "FLOPS loss": 0.027939151972532272, "L0_d": 2543.42, "MLM loss": 2.738865375518799, "epoch": 0.42, "step": 19499 }, { "epoch": 0.42, "learning_rate": 9.806224489795918e-05, "loss": 2.6548, "step": 19500 }, { "FLOPS loss": 0.03628528118133545, "L0_d": 3611.06, "MLM loss": 2.7977004051208496, "epoch": 0.43, "step": 19999 }, { "epoch": 0.43, "learning_rate": 9.796020408163266e-05, "loss": 2.6434, "step": 20000 }, { "FLOPS loss": 0.0318717323243618, "L0_d": 3071.55, "MLM loss": 2.655547618865967, "epoch": 0.44, "step": 20499 }, { "epoch": 0.44, "learning_rate": 9.785836734693878e-05, "loss": 2.6301, "step": 20500 }, { "FLOPS loss": 0.039533913135528564, "L0_d": 3564.52, "MLM loss": 2.4931273460388184, "epoch": 0.45, "step": 20999 }, { "epoch": 0.45, "learning_rate": 9.77565306122449e-05, "loss": 2.6197, "step": 21000 }, { "FLOPS loss": 0.03401036560535431, "L0_d": 3066.97, "MLM loss": 2.598534107208252, "epoch": 0.46, "step": 21499 }, { "epoch": 0.46, "learning_rate": 9.765448979591837e-05, "loss": 2.6015, "step": 21500 }, { "FLOPS loss": 0.032060861587524414, "L0_d": 2761.09, "MLM loss": 2.5144989490509033, "epoch": 0.47, "step": 21999 }, { "epoch": 0.47, "learning_rate": 9.755244897959183e-05, "loss": 2.5921, "step": 22000 }, { "FLOPS loss": 0.03773064538836479, "L0_d": 2863.8, "MLM loss": 2.4851295948028564, "epoch": 0.48, "step": 22499 }, { "epoch": 0.48, "learning_rate": 9.745040816326531e-05, "loss": 2.5806, "step": 22500 }, { "FLOPS loss": 0.03615275397896767, "L0_d": 2681.7, "MLM loss": 2.422513484954834, "epoch": 0.49, "step": 22999 }, { "epoch": 0.49, "learning_rate": 9.734836734693879e-05, "loss": 2.5702, "step": 23000 }, { "FLOPS loss": 0.04175760596990585, "L0_d": 2752.3, "MLM loss": 2.5466079711914062, "epoch": 0.5, "step": 23499 }, { "epoch": 0.5, "learning_rate": 9.724632653061225e-05, "loss": 2.5621, "step": 23500 }, { "FLOPS loss": 0.04358026012778282, "L0_d": 3140.41, "MLM loss": 2.4925756454467773, "epoch": 0.51, "step": 23999 }, { "epoch": 0.51, "learning_rate": 9.714428571428572e-05, "loss": 2.5497, "step": 24000 }, { "FLOPS loss": 0.03997480496764183, "L0_d": 2636.91, "MLM loss": 2.4403679370880127, "epoch": 0.52, "step": 24499 }, { "epoch": 0.52, "learning_rate": 9.70422448979592e-05, "loss": 2.5387, "step": 24500 }, { "FLOPS loss": 0.04443227872252464, "L0_d": 2863.91, "MLM loss": 2.702801465988159, "epoch": 0.53, "step": 24999 }, { "epoch": 0.53, "learning_rate": 9.694061224489797e-05, "loss": 2.5299, "step": 25000 }, { "FLOPS loss": 0.03630707040429115, "L0_d": 1869.66, "MLM loss": 2.500286102294922, "epoch": 0.54, "step": 25499 }, { "epoch": 0.54, "learning_rate": 9.683857142857144e-05, "loss": 2.5238, "step": 25500 }, { "FLOPS loss": 0.045446157455444336, "L0_d": 2295.62, "MLM loss": 2.675072193145752, "epoch": 0.55, "step": 25999 }, { "epoch": 0.55, "learning_rate": 9.67365306122449e-05, "loss": 2.5185, "step": 26000 }, { "FLOPS loss": 0.03888548165559769, "L0_d": 2242.59, "MLM loss": 2.5834720134735107, "epoch": 0.56, "step": 26499 }, { "epoch": 0.56, "learning_rate": 9.663448979591837e-05, "loss": 2.5038, "step": 26500 }, { "FLOPS loss": 0.04285123944282532, "L0_d": 2071.3, "MLM loss": 2.2703781127929688, "epoch": 0.57, "step": 26999 }, { "epoch": 0.57, "learning_rate": 9.653244897959184e-05, "loss": 2.5001, "step": 27000 }, { "FLOPS loss": 0.04465385898947716, "L0_d": 2078.45, "MLM loss": 2.442018985748291, "epoch": 0.59, "step": 27499 }, { "epoch": 0.59, "learning_rate": 9.643040816326531e-05, "loss": 2.4859, "step": 27500 }, { "FLOPS loss": 0.05151209235191345, "L0_d": 2542.83, "MLM loss": 2.4004836082458496, "epoch": 0.6, "step": 27999 }, { "epoch": 0.6, "learning_rate": 9.632836734693877e-05, "loss": 2.4844, "step": 28000 }, { "FLOPS loss": 0.04146797955036163, "L0_d": 2260.97, "MLM loss": 2.3301563262939453, "epoch": 0.61, "step": 28499 }, { "epoch": 0.61, "learning_rate": 9.622653061224491e-05, "loss": 2.4795, "step": 28500 }, { "FLOPS loss": 0.040446631610393524, "L0_d": 1544.88, "MLM loss": 2.348306179046631, "epoch": 0.62, "step": 28999 }, { "epoch": 0.62, "learning_rate": 9.612448979591837e-05, "loss": 2.4705, "step": 29000 }, { "FLOPS loss": 0.048661969602108, "L0_d": 1644.22, "MLM loss": 2.352621555328369, "epoch": 0.63, "step": 29499 }, { "epoch": 0.63, "learning_rate": 9.602244897959184e-05, "loss": 2.4636, "step": 29500 }, { "FLOPS loss": 0.04659537598490715, "L0_d": 2116.88, "MLM loss": 2.5809438228607178, "epoch": 0.64, "step": 29999 }, { "epoch": 0.64, "learning_rate": 9.59204081632653e-05, "loss": 2.4569, "step": 30000 }, { "FLOPS loss": 0.04385443776845932, "L0_d": 1462.5, "MLM loss": 2.521130323410034, "epoch": 0.65, "step": 30499 }, { "epoch": 0.65, "learning_rate": 9.581836734693878e-05, "loss": 2.4556, "step": 30500 }, { "FLOPS loss": 0.05597566068172455, "L0_d": 1834.39, "MLM loss": 2.3147106170654297, "epoch": 0.66, "step": 30999 }, { "epoch": 0.66, "learning_rate": 9.57165306122449e-05, "loss": 2.4486, "step": 31000 }, { "FLOPS loss": 0.05618233606219292, "L0_d": 1604.23, "MLM loss": 2.6163783073425293, "epoch": 0.67, "step": 31499 }, { "epoch": 0.67, "learning_rate": 9.561448979591837e-05, "loss": 2.4394, "step": 31500 }, { "FLOPS loss": 0.056978415697813034, "L0_d": 1700.08, "MLM loss": 2.268538236618042, "epoch": 0.68, "step": 31999 }, { "epoch": 0.68, "learning_rate": 9.551244897959184e-05, "loss": 2.4394, "step": 32000 }, { "FLOPS loss": 0.05571691691875458, "L0_d": 1859.88, "MLM loss": 2.3873777389526367, "epoch": 0.69, "step": 32499 }, { "epoch": 0.69, "learning_rate": 9.541040816326531e-05, "loss": 2.4321, "step": 32500 }, { "FLOPS loss": 0.058303989470005035, "L0_d": 1647.94, "MLM loss": 2.3315484523773193, "epoch": 0.7, "step": 32999 }, { "epoch": 0.7, "learning_rate": 9.530857142857144e-05, "loss": 2.4282, "step": 33000 }, { "FLOPS loss": 0.05903186276555061, "L0_d": 1568.08, "MLM loss": 2.4503281116485596, "epoch": 0.71, "step": 33499 }, { "epoch": 0.71, "learning_rate": 9.520653061224491e-05, "loss": 2.4228, "step": 33500 }, { "FLOPS loss": 0.05316786840558052, "L0_d": 1538.89, "MLM loss": 2.356748104095459, "epoch": 0.72, "step": 33999 }, { "epoch": 0.72, "learning_rate": 9.510448979591837e-05, "loss": 2.4234, "step": 34000 }, { "FLOPS loss": 0.053176648914813995, "L0_d": 1371.69, "MLM loss": 2.461960554122925, "epoch": 0.73, "step": 34499 }, { "epoch": 0.73, "learning_rate": 9.500244897959184e-05, "loss": 2.4175, "step": 34500 }, { "FLOPS loss": 0.05513223633170128, "L0_d": 1471.97, "MLM loss": 2.52297306060791, "epoch": 0.75, "step": 34999 }, { "epoch": 0.75, "learning_rate": 9.490040816326531e-05, "loss": 2.4137, "step": 35000 }, { "FLOPS loss": 0.06698770076036453, "L0_d": 1671.09, "MLM loss": 2.314728021621704, "epoch": 0.76, "step": 35499 }, { "epoch": 0.76, "learning_rate": 9.479857142857144e-05, "loss": 2.4086, "step": 35500 }, { "FLOPS loss": 0.054937928915023804, "L0_d": 1281.67, "MLM loss": 2.2821147441864014, "epoch": 0.77, "step": 35999 }, { "epoch": 0.77, "learning_rate": 9.46965306122449e-05, "loss": 2.4029, "step": 36000 }, { "FLOPS loss": 0.06599493324756622, "L0_d": 1534.23, "MLM loss": 2.372270345687866, "epoch": 0.78, "step": 36499 }, { "epoch": 0.78, "learning_rate": 9.459448979591838e-05, "loss": 2.3998, "step": 36500 }, { "FLOPS loss": 0.0572284571826458, "L0_d": 1141.72, "MLM loss": 2.2006995677948, "epoch": 0.79, "step": 36999 }, { "epoch": 0.79, "learning_rate": 9.449244897959184e-05, "loss": 2.3925, "step": 37000 }, { "FLOPS loss": 0.0742960199713707, "L0_d": 1721.53, "MLM loss": 2.2286112308502197, "epoch": 0.8, "step": 37499 }, { "epoch": 0.8, "learning_rate": 9.439061224489796e-05, "loss": 2.3937, "step": 37500 }, { "FLOPS loss": 0.057512883096933365, "L0_d": 916.3, "MLM loss": 2.375272750854492, "epoch": 0.81, "step": 37999 }, { "epoch": 0.81, "learning_rate": 9.428857142857144e-05, "loss": 2.3879, "step": 38000 }, { "FLOPS loss": 0.07384462654590607, "L0_d": 1570.23, "MLM loss": 2.320944309234619, "epoch": 0.82, "step": 38499 }, { "epoch": 0.82, "learning_rate": 9.41865306122449e-05, "loss": 2.386, "step": 38500 }, { "FLOPS loss": 0.05838833004236221, "L0_d": 976.95, "MLM loss": 2.330425262451172, "epoch": 0.83, "step": 38999 }, { "epoch": 0.83, "learning_rate": 9.408448979591837e-05, "loss": 2.3854, "step": 39000 }, { "FLOPS loss": 0.06257197260856628, "L0_d": 1007.91, "MLM loss": 2.2275640964508057, "epoch": 0.84, "step": 39499 }, { "epoch": 0.84, "learning_rate": 9.398265306122449e-05, "loss": 2.3766, "step": 39500 }, { "FLOPS loss": 0.06444195657968521, "L0_d": 1197.8, "MLM loss": 2.4724812507629395, "epoch": 0.85, "step": 39999 }, { "epoch": 0.85, "learning_rate": 9.388061224489796e-05, "loss": 2.3743, "step": 40000 }, { "FLOPS loss": 0.07541827857494354, "L0_d": 1409.95, "MLM loss": 2.302579402923584, "epoch": 0.86, "step": 40499 }, { "epoch": 0.86, "learning_rate": 9.377857142857144e-05, "loss": 2.3769, "step": 40500 }, { "FLOPS loss": 0.08086178451776505, "L0_d": 1100.89, "MLM loss": 2.3238229751586914, "epoch": 0.87, "step": 40999 }, { "epoch": 0.87, "learning_rate": 9.36765306122449e-05, "loss": 2.3687, "step": 41000 }, { "FLOPS loss": 0.07856693863868713, "L0_d": 1270.05, "MLM loss": 2.2399253845214844, "epoch": 0.88, "step": 41499 }, { "epoch": 0.88, "learning_rate": 9.357448979591838e-05, "loss": 2.3666, "step": 41500 }, { "FLOPS loss": 0.07329129427671432, "L0_d": 1334.06, "MLM loss": 2.3387813568115234, "epoch": 0.89, "step": 41999 }, { "epoch": 0.89, "learning_rate": 9.347265306122449e-05, "loss": 2.3618, "step": 42000 }, { "FLOPS loss": 0.07289435714483261, "L0_d": 1140.52, "MLM loss": 2.252929449081421, "epoch": 0.91, "step": 42499 }, { "epoch": 0.91, "learning_rate": 9.337061224489796e-05, "loss": 2.3592, "step": 42500 }, { "FLOPS loss": 0.07385888695716858, "L0_d": 1153.45, "MLM loss": 2.245168924331665, "epoch": 0.92, "step": 42999 }, { "epoch": 0.92, "learning_rate": 9.326857142857144e-05, "loss": 2.3607, "step": 43000 }, { "FLOPS loss": 0.07996492087841034, "L0_d": 1155.66, "MLM loss": 2.101062536239624, "epoch": 0.93, "step": 43499 }, { "epoch": 0.93, "learning_rate": 9.31665306122449e-05, "loss": 2.3536, "step": 43500 }, { "FLOPS loss": 0.07473205775022507, "L0_d": 1201.67, "MLM loss": 2.2482810020446777, "epoch": 0.94, "step": 43999 }, { "epoch": 0.94, "learning_rate": 9.306469387755103e-05, "loss": 2.3592, "step": 44000 }, { "FLOPS loss": 0.0637981966137886, "L0_d": 694.08, "MLM loss": 2.2862000465393066, "epoch": 0.95, "step": 44499 }, { "epoch": 0.95, "learning_rate": 9.296265306122449e-05, "loss": 2.3556, "step": 44500 }, { "FLOPS loss": 0.07323095202445984, "L0_d": 1020.36, "MLM loss": 2.3087589740753174, "epoch": 0.96, "step": 44999 }, { "epoch": 0.96, "learning_rate": 9.286061224489796e-05, "loss": 2.3506, "step": 45000 }, { "FLOPS loss": 0.06839210540056229, "L0_d": 737.48, "MLM loss": 2.2693538665771484, "epoch": 0.97, "step": 45499 }, { "epoch": 0.97, "learning_rate": 9.275857142857143e-05, "loss": 2.3474, "step": 45500 }, { "FLOPS loss": 0.058115143328905106, "L0_d": 811.34, "MLM loss": 2.4099068641662598, "epoch": 0.98, "step": 45999 }, { "epoch": 0.98, "learning_rate": 9.265653061224491e-05, "loss": 2.3457, "step": 46000 }, { "FLOPS loss": 0.07992670685052872, "L0_d": 1060.0, "MLM loss": 2.431608200073242, "epoch": 0.99, "step": 46499 }, { "epoch": 0.99, "learning_rate": 9.255469387755102e-05, "loss": 2.3416, "step": 46500 }, { "FLOPS loss": 0.07746997475624084, "L0_d": 863.0, "MLM loss": 2.2116293907165527, "epoch": 1.0, "step": 46999 }, { "epoch": 1.0, "learning_rate": 9.24526530612245e-05, "loss": 2.3453, "step": 47000 }, { "FLOPS loss": 0.06712145358324051, "L0_d": 780.94, "MLM loss": 2.144688844680786, "epoch": 1.01, "step": 47499 }, { "epoch": 1.01, "learning_rate": 9.235061224489796e-05, "loss": 2.3361, "step": 47500 }, { "FLOPS loss": 0.07461294531822205, "L0_d": 843.0, "MLM loss": 2.273952007293701, "epoch": 1.02, "step": 47999 }, { "epoch": 1.02, "learning_rate": 9.224857142857143e-05, "loss": 2.339, "step": 48000 }, { "FLOPS loss": 0.06439612060785294, "L0_d": 755.41, "MLM loss": 2.1934714317321777, "epoch": 1.03, "step": 48499 }, { "epoch": 1.03, "learning_rate": 9.21465306122449e-05, "loss": 2.3308, "step": 48500 }, { "FLOPS loss": 0.08083415776491165, "L0_d": 686.83, "MLM loss": 2.324183940887451, "epoch": 1.04, "step": 48999 }, { "epoch": 1.04, "learning_rate": 9.204469387755103e-05, "loss": 2.3345, "step": 49000 }, { "FLOPS loss": 0.0857621356844902, "L0_d": 888.97, "MLM loss": 2.316976547241211, "epoch": 1.05, "step": 49499 }, { "epoch": 1.05, "learning_rate": 9.194265306122449e-05, "loss": 2.3313, "step": 49500 }, { "FLOPS loss": 0.10324012488126755, "L0_d": 1068.78, "MLM loss": 2.3727731704711914, "epoch": 1.06, "step": 49999 }, { "epoch": 1.06, "learning_rate": 9.184061224489796e-05, "loss": 2.3325, "step": 50000 }, { "FLOPS loss": 0.08102630078792572, "L0_d": 728.97, "MLM loss": 2.3263208866119385, "epoch": 1.08, "step": 50499 }, { "epoch": 1.08, "learning_rate": 9.173857142857143e-05, "loss": 2.328, "step": 50500 }, { "FLOPS loss": 0.07676102221012115, "L0_d": 721.47, "MLM loss": 2.250537395477295, "epoch": 1.09, "step": 50999 }, { "epoch": 1.09, "learning_rate": 9.16365306122449e-05, "loss": 2.3182, "step": 51000 }, { "FLOPS loss": 0.08790870010852814, "L0_d": 903.42, "MLM loss": 2.2065377235412598, "epoch": 1.1, "step": 51499 }, { "epoch": 1.1, "learning_rate": 9.153469387755102e-05, "loss": 2.3219, "step": 51500 }, { "FLOPS loss": 0.06780925393104553, "L0_d": 552.94, "MLM loss": 2.3474671840667725, "epoch": 1.11, "step": 51999 }, { "epoch": 1.11, "learning_rate": 9.14326530612245e-05, "loss": 2.3219, "step": 52000 }, { "FLOPS loss": 0.08725595474243164, "L0_d": 719.84, "MLM loss": 2.1877591609954834, "epoch": 1.12, "step": 52499 }, { "epoch": 1.12, "learning_rate": 9.133061224489796e-05, "loss": 2.3138, "step": 52500 }, { "FLOPS loss": 0.0755017101764679, "L0_d": 583.75, "MLM loss": 2.151200771331787, "epoch": 1.13, "step": 52999 }, { "epoch": 1.13, "learning_rate": 9.122857142857143e-05, "loss": 2.3121, "step": 53000 }, { "FLOPS loss": 0.08711685240268707, "L0_d": 698.66, "MLM loss": 2.409646987915039, "epoch": 1.14, "step": 53499 }, { "epoch": 1.14, "learning_rate": 9.112673469387756e-05, "loss": 2.3146, "step": 53500 }, { "FLOPS loss": 0.08219663053750992, "L0_d": 696.67, "MLM loss": 2.1099956035614014, "epoch": 1.15, "step": 53999 }, { "epoch": 1.15, "learning_rate": 9.102469387755103e-05, "loss": 2.3065, "step": 54000 }, { "FLOPS loss": 0.0765044316649437, "L0_d": 606.97, "MLM loss": 2.2134242057800293, "epoch": 1.16, "step": 54499 }, { "epoch": 1.16, "learning_rate": 9.092265306122449e-05, "loss": 2.3057, "step": 54500 }, { "FLOPS loss": 0.09635642915964127, "L0_d": 905.95, "MLM loss": 2.1616404056549072, "epoch": 1.17, "step": 54999 }, { "epoch": 1.17, "learning_rate": 9.082061224489796e-05, "loss": 2.3022, "step": 55000 }, { "FLOPS loss": 0.09710858017206192, "L0_d": 1261.88, "MLM loss": 2.2462549209594727, "epoch": 1.18, "step": 55499 }, { "epoch": 1.18, "learning_rate": 9.071877551020409e-05, "loss": 2.2989, "step": 55500 }, { "FLOPS loss": 0.0736132338643074, "L0_d": 562.47, "MLM loss": 2.2699902057647705, "epoch": 1.19, "step": 55999 }, { "epoch": 1.19, "learning_rate": 9.061673469387755e-05, "loss": 2.3008, "step": 56000 }, { "FLOPS loss": 0.07662803679704666, "L0_d": 789.95, "MLM loss": 2.221466064453125, "epoch": 1.2, "step": 56499 }, { "epoch": 1.2, "learning_rate": 9.051469387755103e-05, "loss": 2.2922, "step": 56500 }, { "FLOPS loss": 0.07387778162956238, "L0_d": 666.42, "MLM loss": 1.9749205112457275, "epoch": 1.21, "step": 56999 }, { "epoch": 1.21, "learning_rate": 9.04126530612245e-05, "loss": 2.2898, "step": 57000 }, { "FLOPS loss": 0.06573477387428284, "L0_d": 532.02, "MLM loss": 2.511476755142212, "epoch": 1.22, "step": 57499 }, { "epoch": 1.22, "learning_rate": 9.031061224489796e-05, "loss": 2.2877, "step": 57500 }, { "FLOPS loss": 0.07265348732471466, "L0_d": 901.97, "MLM loss": 2.1760964393615723, "epoch": 1.24, "step": 57999 }, { "epoch": 1.24, "learning_rate": 9.02087755102041e-05, "loss": 2.2906, "step": 58000 }, { "FLOPS loss": 0.08701298385858536, "L0_d": 941.17, "MLM loss": 2.3221704959869385, "epoch": 1.25, "step": 58499 }, { "epoch": 1.25, "learning_rate": 9.010673469387756e-05, "loss": 2.2837, "step": 58500 }, { "FLOPS loss": 0.07435303926467896, "L0_d": 884.02, "MLM loss": 2.2115917205810547, "epoch": 1.26, "step": 58999 }, { "epoch": 1.26, "learning_rate": 9.000469387755103e-05, "loss": 2.286, "step": 59000 }, { "FLOPS loss": 0.08176206052303314, "L0_d": 952.75, "MLM loss": 2.4079854488372803, "epoch": 1.27, "step": 59499 }, { "epoch": 1.27, "learning_rate": 8.990265306122449e-05, "loss": 2.2774, "step": 59500 }, { "FLOPS loss": 0.08938340097665787, "L0_d": 851.17, "MLM loss": 2.175987958908081, "epoch": 1.28, "step": 59999 }, { "epoch": 1.28, "learning_rate": 8.980081632653061e-05, "loss": 2.2775, "step": 60000 }, { "FLOPS loss": 0.09279684722423553, "L0_d": 1012.95, "MLM loss": 2.1451687812805176, "epoch": 1.29, "step": 60499 }, { "epoch": 1.29, "learning_rate": 8.969877551020408e-05, "loss": 2.2741, "step": 60500 }, { "FLOPS loss": 0.08175905793905258, "L0_d": 546.34, "MLM loss": 2.071819543838501, "epoch": 1.3, "step": 60999 }, { "epoch": 1.3, "learning_rate": 8.959673469387755e-05, "loss": 2.2753, "step": 61000 }, { "FLOPS loss": 0.07440430670976639, "L0_d": 629.91, "MLM loss": 2.1951260566711426, "epoch": 1.31, "step": 61499 }, { "epoch": 1.31, "learning_rate": 8.949469387755103e-05, "loss": 2.2723, "step": 61500 }, { "FLOPS loss": 0.07724060118198395, "L0_d": 933.22, "MLM loss": 2.193014621734619, "epoch": 1.32, "step": 61999 }, { "epoch": 1.32, "learning_rate": 8.939285714285714e-05, "loss": 2.2686, "step": 62000 }, { "FLOPS loss": 0.07582145184278488, "L0_d": 789.0, "MLM loss": 2.2125964164733887, "epoch": 1.33, "step": 62499 }, { "epoch": 1.33, "learning_rate": 8.929081632653062e-05, "loss": 2.2678, "step": 62500 }, { "FLOPS loss": 0.08202211558818817, "L0_d": 931.83, "MLM loss": 2.278818368911743, "epoch": 1.34, "step": 62999 }, { "epoch": 1.34, "learning_rate": 8.918877551020408e-05, "loss": 2.2621, "step": 63000 }, { "FLOPS loss": 0.08058538287878036, "L0_d": 656.52, "MLM loss": 2.1786534786224365, "epoch": 1.35, "step": 63499 }, { "epoch": 1.35, "learning_rate": 8.908673469387756e-05, "loss": 2.2596, "step": 63500 }, { "FLOPS loss": 0.09940757602453232, "L0_d": 681.25, "MLM loss": 2.147231101989746, "epoch": 1.36, "step": 63999 }, { "epoch": 1.36, "learning_rate": 8.898489795918368e-05, "loss": 2.2634, "step": 64000 }, { "FLOPS loss": 0.08605896681547165, "L0_d": 1384.86, "MLM loss": 2.206131935119629, "epoch": 1.37, "step": 64499 }, { "epoch": 1.37, "learning_rate": 8.888285714285715e-05, "loss": 2.2593, "step": 64500 }, { "FLOPS loss": 0.06299304962158203, "L0_d": 629.77, "MLM loss": 2.2180962562561035, "epoch": 1.38, "step": 64999 }, { "epoch": 1.38, "learning_rate": 8.878081632653061e-05, "loss": 2.2553, "step": 65000 }, { "FLOPS loss": 0.07634347677230835, "L0_d": 772.73, "MLM loss": 2.0626327991485596, "epoch": 1.39, "step": 65499 }, { "epoch": 1.39, "learning_rate": 8.867877551020408e-05, "loss": 2.2522, "step": 65500 }, { "FLOPS loss": 0.08463647216558456, "L0_d": 799.12, "MLM loss": 2.189845085144043, "epoch": 1.41, "step": 65999 }, { "epoch": 1.41, "learning_rate": 8.857673469387755e-05, "loss": 2.2519, "step": 66000 }, { "FLOPS loss": 0.0599059984087944, "L0_d": 554.03, "MLM loss": 2.150831937789917, "epoch": 1.42, "step": 66499 }, { "epoch": 1.42, "learning_rate": 8.847489795918367e-05, "loss": 2.2505, "step": 66500 }, { "FLOPS loss": 0.06849193572998047, "L0_d": 659.19, "MLM loss": 2.07958722114563, "epoch": 1.43, "step": 66999 }, { "epoch": 1.43, "learning_rate": 8.837285714285715e-05, "loss": 2.2467, "step": 67000 }, { "FLOPS loss": 0.07847350835800171, "L0_d": 1053.48, "MLM loss": 2.0575110912323, "epoch": 1.44, "step": 67499 }, { "epoch": 1.44, "learning_rate": 8.827081632653062e-05, "loss": 2.2458, "step": 67500 }, { "FLOPS loss": 0.07554128021001816, "L0_d": 573.47, "MLM loss": 2.2414090633392334, "epoch": 1.45, "step": 67999 }, { "epoch": 1.45, "learning_rate": 8.816877551020408e-05, "loss": 2.2414, "step": 68000 }, { "FLOPS loss": 0.07916034013032913, "L0_d": 806.33, "MLM loss": 2.220374584197998, "epoch": 1.46, "step": 68499 }, { "epoch": 1.46, "learning_rate": 8.806693877551022e-05, "loss": 2.2412, "step": 68500 }, { "FLOPS loss": 0.06981582939624786, "L0_d": 521.97, "MLM loss": 2.2153103351593018, "epoch": 1.47, "step": 68999 }, { "epoch": 1.47, "learning_rate": 8.796489795918368e-05, "loss": 2.2436, "step": 69000 }, { "FLOPS loss": 0.10114899277687073, "L0_d": 862.02, "MLM loss": 2.131760597229004, "epoch": 1.48, "step": 69499 }, { "epoch": 1.48, "learning_rate": 8.786285714285715e-05, "loss": 2.237, "step": 69500 }, { "FLOPS loss": 0.0677991583943367, "L0_d": 871.22, "MLM loss": 2.202712297439575, "epoch": 1.49, "step": 69999 }, { "epoch": 1.49, "learning_rate": 8.776081632653061e-05, "loss": 2.2359, "step": 70000 }, { "FLOPS loss": 0.06798284500837326, "L0_d": 805.94, "MLM loss": 2.167118787765503, "epoch": 1.5, "step": 70499 }, { "epoch": 1.5, "learning_rate": 8.765877551020409e-05, "loss": 2.2326, "step": 70500 }, { "FLOPS loss": 0.07358480244874954, "L0_d": 592.3, "MLM loss": 2.184145927429199, "epoch": 1.51, "step": 70999 }, { "epoch": 1.51, "learning_rate": 8.755693877551021e-05, "loss": 2.2309, "step": 71000 }, { "FLOPS loss": 0.05839638411998749, "L0_d": 504.52, "MLM loss": 2.011075019836426, "epoch": 1.52, "step": 71499 }, { "epoch": 1.52, "learning_rate": 8.745489795918367e-05, "loss": 2.2312, "step": 71500 }, { "FLOPS loss": 0.07558299601078033, "L0_d": 876.62, "MLM loss": 2.282175302505493, "epoch": 1.53, "step": 71999 }, { "epoch": 1.53, "learning_rate": 8.735285714285715e-05, "loss": 2.2261, "step": 72000 }, { "FLOPS loss": 0.07914137095212936, "L0_d": 748.42, "MLM loss": 2.159878969192505, "epoch": 1.54, "step": 72499 }, { "epoch": 1.54, "learning_rate": 8.725081632653062e-05, "loss": 2.2303, "step": 72500 }, { "FLOPS loss": 0.07840941846370697, "L0_d": 837.8, "MLM loss": 2.1828160285949707, "epoch": 1.55, "step": 72999 }, { "epoch": 1.55, "learning_rate": 8.714897959183674e-05, "loss": 2.2263, "step": 73000 }, { "FLOPS loss": 0.08968845009803772, "L0_d": 834.58, "MLM loss": 2.078836679458618, "epoch": 1.57, "step": 73499 }, { "epoch": 1.57, "learning_rate": 8.704693877551022e-05, "loss": 2.2241, "step": 73500 }, { "FLOPS loss": 0.0636233538389206, "L0_d": 760.53, "MLM loss": 2.119175672531128, "epoch": 1.58, "step": 73999 }, { "epoch": 1.58, "learning_rate": 8.694489795918368e-05, "loss": 2.2213, "step": 74000 }, { "FLOPS loss": 0.08825450390577316, "L0_d": 1082.2, "MLM loss": 2.0904204845428467, "epoch": 1.59, "step": 74499 }, { "epoch": 1.59, "learning_rate": 8.684285714285715e-05, "loss": 2.2189, "step": 74500 }, { "FLOPS loss": 0.07530651986598969, "L0_d": 793.81, "MLM loss": 2.1687419414520264, "epoch": 1.6, "step": 74999 }, { "epoch": 1.6, "learning_rate": 8.674102040816327e-05, "loss": 2.217, "step": 75000 }, { "FLOPS loss": 0.09109428524971008, "L0_d": 926.31, "MLM loss": 2.2190775871276855, "epoch": 1.61, "step": 75499 }, { "epoch": 1.61, "learning_rate": 8.663897959183674e-05, "loss": 2.2167, "step": 75500 }, { "FLOPS loss": 0.08965260535478592, "L0_d": 815.39, "MLM loss": 2.24111270904541, "epoch": 1.62, "step": 75999 }, { "epoch": 1.62, "learning_rate": 8.653693877551021e-05, "loss": 2.2158, "step": 76000 }, { "FLOPS loss": 0.08030744642019272, "L0_d": 710.55, "MLM loss": 2.096034049987793, "epoch": 1.63, "step": 76499 }, { "epoch": 1.63, "learning_rate": 8.643489795918369e-05, "loss": 2.2151, "step": 76500 }, { "FLOPS loss": 0.08471863716840744, "L0_d": 1109.98, "MLM loss": 2.1691243648529053, "epoch": 1.64, "step": 76999 }, { "epoch": 1.64, "learning_rate": 8.633285714285715e-05, "loss": 2.2155, "step": 77000 }, { "FLOPS loss": 0.0984283909201622, "L0_d": 1047.89, "MLM loss": 2.161064624786377, "epoch": 1.65, "step": 77499 }, { "epoch": 1.65, "learning_rate": 8.623102040816326e-05, "loss": 2.209, "step": 77500 }, { "FLOPS loss": 0.08178388327360153, "L0_d": 852.5, "MLM loss": 2.168562173843384, "epoch": 1.66, "step": 77999 }, { "epoch": 1.66, "learning_rate": 8.612897959183674e-05, "loss": 2.2078, "step": 78000 }, { "FLOPS loss": 0.09100330621004105, "L0_d": 744.31, "MLM loss": 2.0004794597625732, "epoch": 1.67, "step": 78499 }, { "epoch": 1.67, "learning_rate": 8.602693877551022e-05, "loss": 2.2079, "step": 78500 }, { "FLOPS loss": 0.07992446422576904, "L0_d": 707.66, "MLM loss": 2.0477890968322754, "epoch": 1.68, "step": 78999 }, { "epoch": 1.68, "learning_rate": 8.592489795918368e-05, "loss": 2.2054, "step": 79000 }, { "FLOPS loss": 0.07943347096443176, "L0_d": 889.62, "MLM loss": 2.190885305404663, "epoch": 1.69, "step": 79499 }, { "epoch": 1.69, "learning_rate": 8.58230612244898e-05, "loss": 2.1998, "step": 79500 }, { "FLOPS loss": 0.06888856738805771, "L0_d": 767.7, "MLM loss": 2.1345748901367188, "epoch": 1.7, "step": 79999 }, { "epoch": 1.7, "learning_rate": 8.572102040816327e-05, "loss": 2.2022, "step": 80000 }, { "FLOPS loss": 0.08001743257045746, "L0_d": 809.2, "MLM loss": 1.9894421100616455, "epoch": 1.71, "step": 80499 }, { "epoch": 1.71, "learning_rate": 8.561897959183673e-05, "loss": 2.204, "step": 80500 }, { "FLOPS loss": 0.06890819221735, "L0_d": 812.89, "MLM loss": 2.2208876609802246, "epoch": 1.72, "step": 80999 }, { "epoch": 1.72, "learning_rate": 8.551693877551021e-05, "loss": 2.1997, "step": 81000 }, { "FLOPS loss": 0.07821900397539139, "L0_d": 687.19, "MLM loss": 2.076125144958496, "epoch": 1.74, "step": 81499 }, { "epoch": 1.74, "learning_rate": 8.541510204081633e-05, "loss": 2.197, "step": 81500 }, { "FLOPS loss": 0.09430748969316483, "L0_d": 856.03, "MLM loss": 2.220088481903076, "epoch": 1.75, "step": 81999 }, { "epoch": 1.75, "learning_rate": 8.53130612244898e-05, "loss": 2.1946, "step": 82000 }, { "FLOPS loss": 0.0756566971540451, "L0_d": 759.97, "MLM loss": 1.9912607669830322, "epoch": 1.76, "step": 82499 }, { "epoch": 1.76, "learning_rate": 8.521102040816327e-05, "loss": 2.1977, "step": 82500 }, { "FLOPS loss": 0.07251645624637604, "L0_d": 630.61, "MLM loss": 2.082219362258911, "epoch": 1.77, "step": 82999 }, { "epoch": 1.77, "learning_rate": 8.510897959183674e-05, "loss": 2.1932, "step": 83000 }, { "FLOPS loss": 0.07821709662675858, "L0_d": 845.48, "MLM loss": 2.2003273963928223, "epoch": 1.78, "step": 83499 }, { "epoch": 1.78, "learning_rate": 8.50069387755102e-05, "loss": 2.1929, "step": 83500 }, { "FLOPS loss": 0.08727140724658966, "L0_d": 880.19, "MLM loss": 2.121065139770508, "epoch": 1.79, "step": 83999 }, { "epoch": 1.79, "learning_rate": 8.490510204081634e-05, "loss": 2.1897, "step": 84000 }, { "FLOPS loss": 0.07498069107532501, "L0_d": 1045.14, "MLM loss": 2.155193567276001, "epoch": 1.8, "step": 84499 }, { "epoch": 1.8, "learning_rate": 8.48030612244898e-05, "loss": 2.1856, "step": 84500 }, { "FLOPS loss": 0.075527124106884, "L0_d": 825.73, "MLM loss": 2.001152515411377, "epoch": 1.81, "step": 84999 }, { "epoch": 1.81, "learning_rate": 8.470102040816327e-05, "loss": 2.1876, "step": 85000 }, { "FLOPS loss": 0.06435954570770264, "L0_d": 629.23, "MLM loss": 2.0413312911987305, "epoch": 1.82, "step": 85499 }, { "epoch": 1.82, "learning_rate": 8.459897959183673e-05, "loss": 2.1867, "step": 85500 }, { "FLOPS loss": 0.09223916381597519, "L0_d": 719.11, "MLM loss": 2.1891777515411377, "epoch": 1.83, "step": 85999 }, { "epoch": 1.83, "learning_rate": 8.449714285714286e-05, "loss": 2.1867, "step": 86000 }, { "FLOPS loss": 0.06357712298631668, "L0_d": 667.89, "MLM loss": 2.057793140411377, "epoch": 1.84, "step": 86499 }, { "epoch": 1.84, "learning_rate": 8.439510204081633e-05, "loss": 2.1813, "step": 86500 }, { "FLOPS loss": 0.06833140552043915, "L0_d": 727.81, "MLM loss": 2.049499988555908, "epoch": 1.85, "step": 86999 }, { "epoch": 1.85, "learning_rate": 8.42930612244898e-05, "loss": 2.1804, "step": 87000 }, { "FLOPS loss": 0.06953922659158707, "L0_d": 712.0, "MLM loss": 1.9520695209503174, "epoch": 1.86, "step": 87499 }, { "epoch": 1.86, "learning_rate": 8.419102040816327e-05, "loss": 2.1804, "step": 87500 }, { "FLOPS loss": 0.05486714467406273, "L0_d": 557.69, "MLM loss": 2.089158058166504, "epoch": 1.87, "step": 87999 }, { "epoch": 1.87, "learning_rate": 8.408918367346939e-05, "loss": 2.1768, "step": 88000 }, { "FLOPS loss": 0.08773964643478394, "L0_d": 780.02, "MLM loss": 2.027557611465454, "epoch": 1.88, "step": 88499 }, { "epoch": 1.88, "learning_rate": 8.398714285714287e-05, "loss": 2.1738, "step": 88500 }, { "FLOPS loss": 0.07384505122900009, "L0_d": 974.44, "MLM loss": 2.0310699939727783, "epoch": 1.9, "step": 88999 }, { "epoch": 1.9, "learning_rate": 8.388510204081634e-05, "loss": 2.1804, "step": 89000 }, { "FLOPS loss": 0.07133940607309341, "L0_d": 786.97, "MLM loss": 2.072397232055664, "epoch": 1.91, "step": 89499 }, { "epoch": 1.91, "learning_rate": 8.37830612244898e-05, "loss": 2.1774, "step": 89500 }, { "FLOPS loss": 0.08639474958181381, "L0_d": 834.44, "MLM loss": 2.0521416664123535, "epoch": 1.92, "step": 89999 }, { "epoch": 1.92, "learning_rate": 8.368122448979592e-05, "loss": 2.1771, "step": 90000 }, { "FLOPS loss": 0.10411433130502701, "L0_d": 976.3, "MLM loss": 2.184450149536133, "epoch": 1.93, "step": 90499 }, { "epoch": 1.93, "learning_rate": 8.35791836734694e-05, "loss": 2.1769, "step": 90500 }, { "FLOPS loss": 0.07786352187395096, "L0_d": 972.97, "MLM loss": 2.1770880222320557, "epoch": 1.94, "step": 90999 }, { "epoch": 1.94, "learning_rate": 8.347714285714286e-05, "loss": 2.171, "step": 91000 }, { "FLOPS loss": 0.07419507950544357, "L0_d": 904.19, "MLM loss": 2.092041492462158, "epoch": 1.95, "step": 91499 }, { "epoch": 1.95, "learning_rate": 8.337510204081633e-05, "loss": 2.1724, "step": 91500 }, { "FLOPS loss": 0.08307668566703796, "L0_d": 829.2, "MLM loss": 2.167854070663452, "epoch": 1.96, "step": 91999 }, { "epoch": 1.96, "learning_rate": 8.327326530612245e-05, "loss": 2.1704, "step": 92000 }, { "FLOPS loss": 0.06959807127714157, "L0_d": 839.39, "MLM loss": 2.129701852798462, "epoch": 1.97, "step": 92499 }, { "epoch": 1.97, "learning_rate": 8.317122448979591e-05, "loss": 2.1663, "step": 92500 }, { "FLOPS loss": 0.062158744782209396, "L0_d": 598.09, "MLM loss": 1.981863260269165, "epoch": 1.98, "step": 92999 }, { "epoch": 1.98, "learning_rate": 8.306918367346939e-05, "loss": 2.1691, "step": 93000 }, { "FLOPS loss": 0.07570803910493851, "L0_d": 539.5, "MLM loss": 1.99395751953125, "epoch": 1.99, "step": 93499 }, { "epoch": 1.99, "learning_rate": 8.296714285714287e-05, "loss": 2.1685, "step": 93500 }, { "FLOPS loss": 0.0987701267004013, "L0_d": 1091.38, "MLM loss": 2.118992805480957, "epoch": 2.0, "step": 93999 }, { "epoch": 2.0, "learning_rate": 8.286551020408163e-05, "loss": 2.1647, "step": 94000 }, { "FLOPS loss": 0.06103205308318138, "L0_d": 586.62, "MLM loss": 2.1299803256988525, "epoch": 2.01, "step": 94499 }, { "epoch": 2.01, "learning_rate": 8.276346938775511e-05, "loss": 2.1608, "step": 94500 }, { "FLOPS loss": 0.09433767199516296, "L0_d": 764.81, "MLM loss": 2.127784013748169, "epoch": 2.02, "step": 94999 }, { "epoch": 2.02, "learning_rate": 8.266142857142858e-05, "loss": 2.1604, "step": 95000 }, { "FLOPS loss": 0.07245936244726181, "L0_d": 831.86, "MLM loss": 2.176636219024658, "epoch": 2.03, "step": 95499 }, { "epoch": 2.03, "learning_rate": 8.255938775510204e-05, "loss": 2.1592, "step": 95500 }, { "FLOPS loss": 0.059117577970027924, "L0_d": 507.25, "MLM loss": 1.9163801670074463, "epoch": 2.04, "step": 95999 }, { "epoch": 2.04, "learning_rate": 8.245734693877552e-05, "loss": 2.1556, "step": 96000 }, { "FLOPS loss": 0.07038378715515137, "L0_d": 1127.86, "MLM loss": 2.097640037536621, "epoch": 2.06, "step": 96499 }, { "epoch": 2.06, "learning_rate": 8.235530612244898e-05, "loss": 2.1538, "step": 96500 }, { "FLOPS loss": 0.07318626344203949, "L0_d": 607.73, "MLM loss": 2.035457134246826, "epoch": 2.07, "step": 96999 }, { "epoch": 2.07, "learning_rate": 8.225326530612245e-05, "loss": 2.1576, "step": 97000 }, { "FLOPS loss": 0.0791444182395935, "L0_d": 816.31, "MLM loss": 2.1670773029327393, "epoch": 2.08, "step": 97499 }, { "epoch": 2.08, "learning_rate": 8.215122448979591e-05, "loss": 2.1552, "step": 97500 }, { "FLOPS loss": 0.07561291009187698, "L0_d": 588.3, "MLM loss": 2.1358835697174072, "epoch": 2.09, "step": 97999 }, { "epoch": 2.09, "learning_rate": 8.204938775510205e-05, "loss": 2.1562, "step": 98000 }, { "FLOPS loss": 0.07940414547920227, "L0_d": 718.41, "MLM loss": 2.0383706092834473, "epoch": 2.1, "step": 98499 }, { "epoch": 2.1, "learning_rate": 8.194734693877551e-05, "loss": 2.1527, "step": 98500 }, { "FLOPS loss": 0.06504429876804352, "L0_d": 550.23, "MLM loss": 2.195014476776123, "epoch": 2.11, "step": 98999 }, { "epoch": 2.11, "learning_rate": 8.184530612244898e-05, "loss": 2.1497, "step": 99000 }, { "FLOPS loss": 0.069733627140522, "L0_d": 642.94, "MLM loss": 2.1821696758270264, "epoch": 2.12, "step": 99499 }, { "epoch": 2.12, "learning_rate": 8.174326530612246e-05, "loss": 2.1516, "step": 99500 }, { "FLOPS loss": 0.07867594063282013, "L0_d": 619.06, "MLM loss": 2.0050911903381348, "epoch": 2.13, "step": 99999 }, { "epoch": 2.13, "learning_rate": 8.164122448979592e-05, "loss": 2.1481, "step": 100000 }, { "FLOPS loss": 0.08644675463438034, "L0_d": 820.27, "MLM loss": 1.9595686197280884, "epoch": 2.14, "step": 100499 }, { "epoch": 2.14, "learning_rate": 8.153938775510205e-05, "loss": 2.148, "step": 100500 }, { "FLOPS loss": 0.061923496425151825, "L0_d": 575.0, "MLM loss": 2.031733989715576, "epoch": 2.15, "step": 100999 }, { "epoch": 2.15, "learning_rate": 8.143734693877552e-05, "loss": 2.1482, "step": 101000 }, { "FLOPS loss": 0.07247359305620193, "L0_d": 729.3, "MLM loss": 2.111258029937744, "epoch": 2.16, "step": 101499 }, { "epoch": 2.16, "learning_rate": 8.133530612244898e-05, "loss": 2.1423, "step": 101500 }, { "FLOPS loss": 0.07209628820419312, "L0_d": 757.38, "MLM loss": 2.086599826812744, "epoch": 2.17, "step": 101999 }, { "epoch": 2.17, "learning_rate": 8.123326530612245e-05, "loss": 2.142, "step": 102000 }, { "FLOPS loss": 0.07832463830709457, "L0_d": 702.0, "MLM loss": 2.0252575874328613, "epoch": 2.18, "step": 102499 }, { "epoch": 2.18, "learning_rate": 8.113142857142858e-05, "loss": 2.1446, "step": 102500 }, { "FLOPS loss": 0.07046867161989212, "L0_d": 741.03, "MLM loss": 2.137556552886963, "epoch": 2.19, "step": 102999 }, { "epoch": 2.19, "learning_rate": 8.102938775510205e-05, "loss": 2.1419, "step": 103000 }, { "FLOPS loss": 0.0920414999127388, "L0_d": 1184.77, "MLM loss": 2.0097923278808594, "epoch": 2.2, "step": 103499 }, { "epoch": 2.2, "learning_rate": 8.092734693877551e-05, "loss": 2.1431, "step": 103500 }, { "FLOPS loss": 0.06898413598537445, "L0_d": 748.45, "MLM loss": 2.123561382293701, "epoch": 2.21, "step": 103999 }, { "epoch": 2.21, "learning_rate": 8.082530612244899e-05, "loss": 2.1411, "step": 104000 }, { "FLOPS loss": 0.07652837038040161, "L0_d": 766.36, "MLM loss": 2.1538445949554443, "epoch": 2.23, "step": 104499 }, { "epoch": 2.23, "learning_rate": 8.07234693877551e-05, "loss": 2.1418, "step": 104500 }, { "FLOPS loss": 0.09778144955635071, "L0_d": 956.75, "MLM loss": 2.149035930633545, "epoch": 2.24, "step": 104999 }, { "epoch": 2.24, "learning_rate": 8.062142857142858e-05, "loss": 2.1406, "step": 105000 }, { "FLOPS loss": 0.07300714403390884, "L0_d": 606.64, "MLM loss": 2.056023597717285, "epoch": 2.25, "step": 105499 }, { "epoch": 2.25, "learning_rate": 8.051938775510205e-05, "loss": 2.1359, "step": 105500 }, { "FLOPS loss": 0.09272401034832001, "L0_d": 801.7, "MLM loss": 2.0067789554595947, "epoch": 2.26, "step": 105999 }, { "epoch": 2.26, "learning_rate": 8.041734693877552e-05, "loss": 2.1394, "step": 106000 }, { "FLOPS loss": 0.08801499009132385, "L0_d": 878.59, "MLM loss": 2.035496234893799, "epoch": 2.27, "step": 106499 }, { "epoch": 2.27, "learning_rate": 8.031551020408164e-05, "loss": 2.1345, "step": 106500 }, { "FLOPS loss": 0.08799153566360474, "L0_d": 653.61, "MLM loss": 2.0127854347229004, "epoch": 2.28, "step": 106999 }, { "epoch": 2.28, "learning_rate": 8.02134693877551e-05, "loss": 2.1346, "step": 107000 }, { "FLOPS loss": 0.06928052008152008, "L0_d": 940.64, "MLM loss": 2.041571617126465, "epoch": 2.29, "step": 107499 }, { "epoch": 2.29, "learning_rate": 8.011142857142857e-05, "loss": 2.1327, "step": 107500 }, { "FLOPS loss": 0.08908580243587494, "L0_d": 769.25, "MLM loss": 2.1204586029052734, "epoch": 2.3, "step": 107999 }, { "epoch": 2.3, "learning_rate": 8.000938775510205e-05, "loss": 2.1354, "step": 108000 }, { "FLOPS loss": 0.08444768190383911, "L0_d": 870.19, "MLM loss": 2.0592079162597656, "epoch": 2.31, "step": 108499 }, { "epoch": 2.31, "learning_rate": 7.990755102040817e-05, "loss": 2.1354, "step": 108500 }, { "FLOPS loss": 0.06808961927890778, "L0_d": 588.69, "MLM loss": 2.1283202171325684, "epoch": 2.32, "step": 108999 }, { "epoch": 2.32, "learning_rate": 7.980551020408163e-05, "loss": 2.1278, "step": 109000 }, { "FLOPS loss": 0.07635576277971268, "L0_d": 898.77, "MLM loss": 1.9690206050872803, "epoch": 2.33, "step": 109499 }, { "epoch": 2.33, "learning_rate": 7.97034693877551e-05, "loss": 2.1302, "step": 109500 }, { "FLOPS loss": 0.08134192228317261, "L0_d": 714.91, "MLM loss": 2.120096445083618, "epoch": 2.34, "step": 109999 }, { "epoch": 2.34, "learning_rate": 7.960142857142858e-05, "loss": 2.1279, "step": 110000 }, { "FLOPS loss": 0.07751913368701935, "L0_d": 571.03, "MLM loss": 2.038954496383667, "epoch": 2.35, "step": 110499 }, { "epoch": 2.35, "learning_rate": 7.94995918367347e-05, "loss": 2.1268, "step": 110500 }, { "FLOPS loss": 0.07746893912553787, "L0_d": 878.41, "MLM loss": 2.1648635864257812, "epoch": 2.36, "step": 110999 }, { "epoch": 2.36, "learning_rate": 7.939755102040816e-05, "loss": 2.1271, "step": 111000 }, { "FLOPS loss": 0.07608819752931595, "L0_d": 597.73, "MLM loss": 1.9035899639129639, "epoch": 2.37, "step": 111499 }, { "epoch": 2.37, "learning_rate": 7.929551020408164e-05, "loss": 2.1277, "step": 111500 }, { "FLOPS loss": 0.07868288457393646, "L0_d": 842.05, "MLM loss": 2.2060461044311523, "epoch": 2.39, "step": 111999 }, { "epoch": 2.39, "learning_rate": 7.91934693877551e-05, "loss": 2.1268, "step": 112000 }, { "FLOPS loss": 0.06169697642326355, "L0_d": 662.14, "MLM loss": 2.0528273582458496, "epoch": 2.4, "step": 112499 }, { "epoch": 2.4, "learning_rate": 7.909163265306124e-05, "loss": 2.1269, "step": 112500 }, { "FLOPS loss": 0.07529112696647644, "L0_d": 815.22, "MLM loss": 2.0095815658569336, "epoch": 2.41, "step": 112999 }, { "epoch": 2.41, "learning_rate": 7.89895918367347e-05, "loss": 2.1241, "step": 113000 }, { "FLOPS loss": 0.0667743906378746, "L0_d": 679.59, "MLM loss": 2.011075258255005, "epoch": 2.42, "step": 113499 }, { "epoch": 2.42, "learning_rate": 7.888755102040817e-05, "loss": 2.1167, "step": 113500 }, { "FLOPS loss": 0.08264929056167603, "L0_d": 838.69, "MLM loss": 1.9273202419281006, "epoch": 2.43, "step": 113999 }, { "epoch": 2.43, "learning_rate": 7.878551020408163e-05, "loss": 2.1225, "step": 114000 }, { "FLOPS loss": 0.09935013949871063, "L0_d": 979.97, "MLM loss": 1.986405611038208, "epoch": 2.44, "step": 114499 }, { "epoch": 2.44, "learning_rate": 7.868367346938777e-05, "loss": 2.1164, "step": 114500 }, { "FLOPS loss": 0.09770844131708145, "L0_d": 787.3, "MLM loss": 2.0416669845581055, "epoch": 2.45, "step": 114999 }, { "epoch": 2.45, "learning_rate": 7.858183673469389e-05, "loss": 2.121, "step": 115000 }, { "FLOPS loss": 0.05972149968147278, "L0_d": 543.11, "MLM loss": 2.0023961067199707, "epoch": 2.46, "step": 115499 }, { "epoch": 2.46, "learning_rate": 7.847979591836735e-05, "loss": 2.1165, "step": 115500 }, { "FLOPS loss": 0.08709295839071274, "L0_d": 732.72, "MLM loss": 2.027191638946533, "epoch": 2.47, "step": 115999 }, { "epoch": 2.47, "learning_rate": 7.837775510204082e-05, "loss": 2.1156, "step": 116000 }, { "FLOPS loss": 0.08640507608652115, "L0_d": 845.5, "MLM loss": 2.0632519721984863, "epoch": 2.48, "step": 116499 }, { "epoch": 2.48, "learning_rate": 7.827571428571428e-05, "loss": 2.1191, "step": 116500 }, { "FLOPS loss": 0.08174131065607071, "L0_d": 931.69, "MLM loss": 1.9130254983901978, "epoch": 2.49, "step": 116999 }, { "epoch": 2.49, "learning_rate": 7.817367346938776e-05, "loss": 2.1117, "step": 117000 }, { "FLOPS loss": 0.08186709880828857, "L0_d": 724.42, "MLM loss": 2.023085117340088, "epoch": 2.5, "step": 117499 }, { "epoch": 2.5, "learning_rate": 7.807163265306124e-05, "loss": 2.1155, "step": 117500 }, { "FLOPS loss": 0.08723706007003784, "L0_d": 1007.92, "MLM loss": 2.0281600952148438, "epoch": 2.51, "step": 117999 }, { "epoch": 2.51, "learning_rate": 7.79695918367347e-05, "loss": 2.1153, "step": 118000 }, { "FLOPS loss": 0.07498479634523392, "L0_d": 667.45, "MLM loss": 1.9852380752563477, "epoch": 2.52, "step": 118499 }, { "epoch": 2.52, "learning_rate": 7.786755102040817e-05, "loss": 2.1133, "step": 118500 }, { "FLOPS loss": 0.10051379352807999, "L0_d": 1041.19, "MLM loss": 1.9548364877700806, "epoch": 2.53, "step": 118999 }, { "epoch": 2.53, "learning_rate": 7.776551020408163e-05, "loss": 2.1133, "step": 119000 }, { "FLOPS loss": 0.08071957528591156, "L0_d": 773.66, "MLM loss": 2.1546220779418945, "epoch": 2.54, "step": 119499 }, { "epoch": 2.54, "learning_rate": 7.766367346938775e-05, "loss": 2.1142, "step": 119500 }, { "FLOPS loss": 0.08354714512825012, "L0_d": 889.59, "MLM loss": 1.9811208248138428, "epoch": 2.56, "step": 119999 }, { "epoch": 2.56, "learning_rate": 7.756163265306123e-05, "loss": 2.1146, "step": 120000 }, { "FLOPS loss": 0.07952099293470383, "L0_d": 512.33, "MLM loss": 2.0295233726501465, "epoch": 2.57, "step": 120499 }, { "epoch": 2.57, "learning_rate": 7.74595918367347e-05, "loss": 2.1118, "step": 120500 }, { "FLOPS loss": 0.07554259896278381, "L0_d": 694.92, "MLM loss": 1.9999445676803589, "epoch": 2.58, "step": 120999 }, { "epoch": 2.58, "learning_rate": 7.735755102040817e-05, "loss": 2.1113, "step": 121000 }, { "FLOPS loss": 0.07274837791919708, "L0_d": 696.59, "MLM loss": 2.136122465133667, "epoch": 2.59, "step": 121499 }, { "epoch": 2.59, "learning_rate": 7.725571428571428e-05, "loss": 2.108, "step": 121500 }, { "FLOPS loss": 0.08334198594093323, "L0_d": 939.45, "MLM loss": 2.0925543308258057, "epoch": 2.6, "step": 121999 }, { "epoch": 2.6, "learning_rate": 7.715367346938776e-05, "loss": 2.1041, "step": 122000 }, { "FLOPS loss": 0.07802562415599823, "L0_d": 719.03, "MLM loss": 1.9580026865005493, "epoch": 2.61, "step": 122499 }, { "epoch": 2.61, "learning_rate": 7.705163265306122e-05, "loss": 2.1062, "step": 122500 }, { "FLOPS loss": 0.07663872092962265, "L0_d": 767.58, "MLM loss": 2.0476346015930176, "epoch": 2.62, "step": 122999 }, { "epoch": 2.62, "learning_rate": 7.694979591836736e-05, "loss": 2.1065, "step": 123000 }, { "FLOPS loss": 0.07221131026744843, "L0_d": 614.86, "MLM loss": 2.0577306747436523, "epoch": 2.63, "step": 123499 }, { "epoch": 2.63, "learning_rate": 7.684775510204082e-05, "loss": 2.1038, "step": 123500 }, { "FLOPS loss": 0.07997844368219376, "L0_d": 788.98, "MLM loss": 1.9502489566802979, "epoch": 2.64, "step": 123999 }, { "epoch": 2.64, "learning_rate": 7.674571428571429e-05, "loss": 2.1017, "step": 124000 }, { "FLOPS loss": 0.07191063463687897, "L0_d": 812.03, "MLM loss": 1.900945782661438, "epoch": 2.65, "step": 124499 }, { "epoch": 2.65, "learning_rate": 7.664367346938775e-05, "loss": 2.1041, "step": 124500 }, { "FLOPS loss": 0.07377645373344421, "L0_d": 741.73, "MLM loss": 1.984440803527832, "epoch": 2.66, "step": 124999 }, { "epoch": 2.66, "learning_rate": 7.654163265306123e-05, "loss": 2.1027, "step": 125000 }, { "FLOPS loss": 0.07682693749666214, "L0_d": 721.17, "MLM loss": 2.003563165664673, "epoch": 2.67, "step": 125499 }, { "epoch": 2.67, "learning_rate": 7.643959183673471e-05, "loss": 2.1, "step": 125500 }, { "FLOPS loss": 0.08019888401031494, "L0_d": 720.73, "MLM loss": 2.0952367782592773, "epoch": 2.68, "step": 125999 }, { "epoch": 2.68, "learning_rate": 7.633755102040817e-05, "loss": 2.1036, "step": 126000 }, { "FLOPS loss": 0.08629703521728516, "L0_d": 741.38, "MLM loss": 1.9909753799438477, "epoch": 2.69, "step": 126499 }, { "epoch": 2.69, "learning_rate": 7.623551020408164e-05, "loss": 2.1026, "step": 126500 }, { "FLOPS loss": 0.0715538039803505, "L0_d": 745.28, "MLM loss": 2.0442450046539307, "epoch": 2.7, "step": 126999 }, { "epoch": 2.7, "learning_rate": 7.613367346938776e-05, "loss": 2.1007, "step": 127000 }, { "FLOPS loss": 0.06486863642930984, "L0_d": 602.3, "MLM loss": 2.0304067134857178, "epoch": 2.72, "step": 127499 }, { "epoch": 2.72, "learning_rate": 7.603163265306122e-05, "loss": 2.0963, "step": 127500 }, { "FLOPS loss": 0.07741992175579071, "L0_d": 633.12, "MLM loss": 2.092017650604248, "epoch": 2.73, "step": 127999 }, { "epoch": 2.73, "learning_rate": 7.592959183673469e-05, "loss": 2.0985, "step": 128000 }, { "FLOPS loss": 0.055709708482027054, "L0_d": 678.59, "MLM loss": 1.9564106464385986, "epoch": 2.74, "step": 128499 }, { "epoch": 2.74, "learning_rate": 7.582755102040817e-05, "loss": 2.0985, "step": 128500 }, { "FLOPS loss": 0.08097654581069946, "L0_d": 947.98, "MLM loss": 2.071964740753174, "epoch": 2.75, "step": 128999 }, { "epoch": 2.75, "learning_rate": 7.572571428571429e-05, "loss": 2.098, "step": 129000 }, { "FLOPS loss": 0.08121536672115326, "L0_d": 914.73, "MLM loss": 1.9944313764572144, "epoch": 2.76, "step": 129499 }, { "epoch": 2.76, "learning_rate": 7.562367346938775e-05, "loss": 2.092, "step": 129500 }, { "FLOPS loss": 0.07965188473463058, "L0_d": 735.81, "MLM loss": 2.1375985145568848, "epoch": 2.77, "step": 129999 }, { "epoch": 2.77, "learning_rate": 7.552163265306123e-05, "loss": 2.0923, "step": 130000 }, { "FLOPS loss": 0.06054941192269325, "L0_d": 546.17, "MLM loss": 2.0754427909851074, "epoch": 2.78, "step": 130499 }, { "epoch": 2.78, "learning_rate": 7.54195918367347e-05, "loss": 2.0946, "step": 130500 }, { "FLOPS loss": 0.07722670584917068, "L0_d": 1042.67, "MLM loss": 2.0651721954345703, "epoch": 2.79, "step": 130999 }, { "epoch": 2.79, "learning_rate": 7.531775510204082e-05, "loss": 2.0917, "step": 131000 }, { "FLOPS loss": 0.08554383367300034, "L0_d": 919.33, "MLM loss": 2.1933531761169434, "epoch": 2.8, "step": 131499 }, { "epoch": 2.8, "learning_rate": 7.52157142857143e-05, "loss": 2.0932, "step": 131500 }, { "FLOPS loss": 0.09546613693237305, "L0_d": 709.77, "MLM loss": 2.0176644325256348, "epoch": 2.81, "step": 131999 }, { "epoch": 2.81, "learning_rate": 7.511367346938776e-05, "loss": 2.0914, "step": 132000 }, { "FLOPS loss": 0.08541066944599152, "L0_d": 823.94, "MLM loss": 2.066415786743164, "epoch": 2.82, "step": 132499 }, { "epoch": 2.82, "learning_rate": 7.501163265306122e-05, "loss": 2.0921, "step": 132500 }, { "FLOPS loss": 0.08676711469888687, "L0_d": 714.61, "MLM loss": 2.1011126041412354, "epoch": 2.83, "step": 132999 }, { "epoch": 2.83, "learning_rate": 7.490979591836736e-05, "loss": 2.0941, "step": 133000 }, { "FLOPS loss": 0.06921195238828659, "L0_d": 802.39, "MLM loss": 1.955210566520691, "epoch": 2.84, "step": 133499 }, { "epoch": 2.84, "learning_rate": 7.480775510204082e-05, "loss": 2.0922, "step": 133500 }, { "FLOPS loss": 0.07848220318555832, "L0_d": 554.2, "MLM loss": 2.0789244174957275, "epoch": 2.85, "step": 133999 }, { "epoch": 2.85, "learning_rate": 7.470571428571429e-05, "loss": 2.0911, "step": 134000 }, { "FLOPS loss": 0.08104442059993744, "L0_d": 967.77, "MLM loss": 2.0796117782592773, "epoch": 2.86, "step": 134499 }, { "epoch": 2.86, "learning_rate": 7.460367346938776e-05, "loss": 2.0915, "step": 134500 }, { "FLOPS loss": 0.08678069710731506, "L0_d": 963.12, "MLM loss": 2.073706865310669, "epoch": 2.87, "step": 134999 }, { "epoch": 2.87, "learning_rate": 7.450183673469389e-05, "loss": 2.0891, "step": 135000 }, { "FLOPS loss": 0.07000401616096497, "L0_d": 784.11, "MLM loss": 1.962247610092163, "epoch": 2.89, "step": 135499 }, { "epoch": 2.89, "learning_rate": 7.439979591836735e-05, "loss": 2.0861, "step": 135500 }, { "FLOPS loss": 0.06950665265321732, "L0_d": 535.03, "MLM loss": 2.1357204914093018, "epoch": 2.9, "step": 135999 }, { "epoch": 2.9, "learning_rate": 7.429795918367347e-05, "loss": 2.0883, "step": 136000 }, { "FLOPS loss": 0.08367476612329483, "L0_d": 646.3, "MLM loss": 1.8792662620544434, "epoch": 2.91, "step": 136499 }, { "epoch": 2.91, "learning_rate": 7.419591836734694e-05, "loss": 2.0846, "step": 136500 }, { "FLOPS loss": 0.06122388690710068, "L0_d": 699.14, "MLM loss": 1.9720458984375, "epoch": 2.92, "step": 136999 }, { "epoch": 2.92, "learning_rate": 7.409387755102041e-05, "loss": 2.0875, "step": 137000 }, { "FLOPS loss": 0.0852053165435791, "L0_d": 877.64, "MLM loss": 1.9794983863830566, "epoch": 2.93, "step": 137499 }, { "epoch": 2.93, "learning_rate": 7.399183673469388e-05, "loss": 2.0881, "step": 137500 }, { "FLOPS loss": 0.09250415861606598, "L0_d": 736.42, "MLM loss": 2.1271352767944336, "epoch": 2.94, "step": 137999 }, { "epoch": 2.94, "learning_rate": 7.388979591836736e-05, "loss": 2.0844, "step": 138000 }, { "FLOPS loss": 0.09295733273029327, "L0_d": 944.11, "MLM loss": 1.9891127347946167, "epoch": 2.95, "step": 138499 }, { "epoch": 2.95, "learning_rate": 7.378775510204082e-05, "loss": 2.083, "step": 138500 }, { "FLOPS loss": 0.069969043135643, "L0_d": 701.23, "MLM loss": 1.936417818069458, "epoch": 2.96, "step": 138999 }, { "epoch": 2.96, "learning_rate": 7.368571428571429e-05, "loss": 2.0795, "step": 139000 }, { "FLOPS loss": 0.06195145100355148, "L0_d": 546.84, "MLM loss": 1.9803211688995361, "epoch": 2.97, "step": 139499 }, { "epoch": 2.97, "learning_rate": 7.358367346938776e-05, "loss": 2.083, "step": 139500 }, { "FLOPS loss": 0.07750408351421356, "L0_d": 571.36, "MLM loss": 1.9305696487426758, "epoch": 2.98, "step": 139999 }, { "epoch": 2.98, "learning_rate": 7.348183673469387e-05, "loss": 2.0841, "step": 140000 }, { "FLOPS loss": 0.08334793150424957, "L0_d": 848.39, "MLM loss": 1.814992904663086, "epoch": 2.99, "step": 140499 }, { "epoch": 2.99, "learning_rate": 7.337979591836735e-05, "loss": 2.0783, "step": 140500 }, { "FLOPS loss": 0.07291968166828156, "L0_d": 730.91, "MLM loss": 2.0424416065216064, "epoch": 3.0, "step": 140999 }, { "epoch": 3.0, "learning_rate": 7.327775510204083e-05, "loss": 2.0796, "step": 141000 }, { "FLOPS loss": 0.08008062839508057, "L0_d": 827.06, "MLM loss": 2.070688486099243, "epoch": 3.01, "step": 141499 }, { "epoch": 3.01, "learning_rate": 7.317571428571429e-05, "loss": 2.0768, "step": 141500 }, { "FLOPS loss": 0.07891778647899628, "L0_d": 1120.58, "MLM loss": 2.0585923194885254, "epoch": 3.02, "step": 141999 }, { "epoch": 3.02, "learning_rate": 7.307387755102041e-05, "loss": 2.0781, "step": 142000 }, { "FLOPS loss": 0.08013079315423965, "L0_d": 939.69, "MLM loss": 2.0557148456573486, "epoch": 3.03, "step": 142499 }, { "epoch": 3.03, "learning_rate": 7.297183673469388e-05, "loss": 2.0711, "step": 142500 }, { "FLOPS loss": 0.07039299607276917, "L0_d": 884.61, "MLM loss": 2.26658034324646, "epoch": 3.05, "step": 142999 }, { "epoch": 3.05, "learning_rate": 7.286979591836734e-05, "loss": 2.0762, "step": 143000 }, { "FLOPS loss": 0.08341009169816971, "L0_d": 1099.41, "MLM loss": 1.9736123085021973, "epoch": 3.06, "step": 143499 }, { "epoch": 3.06, "learning_rate": 7.276775510204082e-05, "loss": 2.0726, "step": 143500 }, { "FLOPS loss": 0.07099974900484085, "L0_d": 677.59, "MLM loss": 1.942285180091858, "epoch": 3.07, "step": 143999 }, { "epoch": 3.07, "learning_rate": 7.26657142857143e-05, "loss": 2.0704, "step": 144000 }, { "FLOPS loss": 0.07856574654579163, "L0_d": 963.98, "MLM loss": 1.9899259805679321, "epoch": 3.08, "step": 144499 }, { "epoch": 3.08, "learning_rate": 7.256367346938776e-05, "loss": 2.0739, "step": 144500 }, { "FLOPS loss": 0.07401182502508163, "L0_d": 681.78, "MLM loss": 1.916780948638916, "epoch": 3.09, "step": 144999 }, { "epoch": 3.09, "learning_rate": 7.246183673469387e-05, "loss": 2.0708, "step": 145000 }, { "FLOPS loss": 0.09351439774036407, "L0_d": 556.44, "MLM loss": 2.0708470344543457, "epoch": 3.1, "step": 145499 }, { "epoch": 3.1, "learning_rate": 7.235979591836735e-05, "loss": 2.0742, "step": 145500 }, { "FLOPS loss": 0.07324228435754776, "L0_d": 875.48, "MLM loss": 2.053828477859497, "epoch": 3.11, "step": 145999 }, { "epoch": 3.11, "learning_rate": 7.225775510204083e-05, "loss": 2.0709, "step": 146000 }, { "FLOPS loss": 0.08776471018791199, "L0_d": 757.81, "MLM loss": 2.021151065826416, "epoch": 3.12, "step": 146499 }, { "epoch": 3.12, "learning_rate": 7.215571428571429e-05, "loss": 2.0748, "step": 146500 }, { "FLOPS loss": 0.07924683392047882, "L0_d": 737.95, "MLM loss": 2.0102930068969727, "epoch": 3.13, "step": 146999 }, { "epoch": 3.13, "learning_rate": 7.205367346938776e-05, "loss": 2.0692, "step": 147000 }, { "FLOPS loss": 0.07468462735414505, "L0_d": 579.72, "MLM loss": 2.31913685798645, "epoch": 3.14, "step": 147499 }, { "epoch": 3.14, "learning_rate": 7.195183673469388e-05, "loss": 2.0714, "step": 147500 }, { "FLOPS loss": 0.07593423873186111, "L0_d": 786.03, "MLM loss": 1.9522225856781006, "epoch": 3.15, "step": 147999 }, { "epoch": 3.15, "learning_rate": 7.184979591836734e-05, "loss": 2.0678, "step": 148000 }, { "FLOPS loss": 0.08743783086538315, "L0_d": 833.62, "MLM loss": 2.035630702972412, "epoch": 3.16, "step": 148499 }, { "epoch": 3.16, "learning_rate": 7.174775510204082e-05, "loss": 2.0673, "step": 148500 }, { "FLOPS loss": 0.06403844803571701, "L0_d": 583.55, "MLM loss": 1.965914011001587, "epoch": 3.17, "step": 148999 }, { "epoch": 3.17, "learning_rate": 7.16457142857143e-05, "loss": 2.0652, "step": 149000 }, { "FLOPS loss": 0.07395175844430923, "L0_d": 928.45, "MLM loss": 1.829352855682373, "epoch": 3.18, "step": 149499 }, { "epoch": 3.18, "learning_rate": 7.154367346938776e-05, "loss": 2.067, "step": 149500 }, { "FLOPS loss": 0.07722048461437225, "L0_d": 903.33, "MLM loss": 1.9278677701950073, "epoch": 3.19, "step": 149999 }, { "epoch": 3.19, "learning_rate": 7.144183673469388e-05, "loss": 2.0659, "step": 150000 }, { "FLOPS loss": 0.072312131524086, "L0_d": 516.84, "MLM loss": 1.9207533597946167, "epoch": 3.2, "step": 150499 }, { "epoch": 3.2, "learning_rate": 7.133979591836735e-05, "loss": 2.0657, "step": 150500 }, { "FLOPS loss": 0.10924256592988968, "L0_d": 1259.34, "MLM loss": 1.9205459356307983, "epoch": 3.22, "step": 150999 }, { "epoch": 3.22, "learning_rate": 7.123775510204081e-05, "loss": 2.0636, "step": 151000 }, { "FLOPS loss": 0.0764654129743576, "L0_d": 890.97, "MLM loss": 2.1768999099731445, "epoch": 3.23, "step": 151499 }, { "epoch": 3.23, "learning_rate": 7.113571428571429e-05, "loss": 2.0645, "step": 151500 }, { "FLOPS loss": 0.07931890338659286, "L0_d": 1040.11, "MLM loss": 1.8496606349945068, "epoch": 3.24, "step": 151999 }, { "epoch": 3.24, "learning_rate": 7.103367346938776e-05, "loss": 2.0592, "step": 152000 }, { "FLOPS loss": 0.0654386356472969, "L0_d": 611.86, "MLM loss": 1.8984403610229492, "epoch": 3.25, "step": 152499 }, { "epoch": 3.25, "learning_rate": 7.093183673469388e-05, "loss": 2.0637, "step": 152500 }, { "FLOPS loss": 0.07457588613033295, "L0_d": 571.36, "MLM loss": 1.9137520790100098, "epoch": 3.26, "step": 152999 }, { "epoch": 3.26, "learning_rate": 7.082979591836734e-05, "loss": 2.06, "step": 153000 }, { "FLOPS loss": 0.09647884219884872, "L0_d": 1122.12, "MLM loss": 2.0544707775115967, "epoch": 3.27, "step": 153499 }, { "epoch": 3.27, "learning_rate": 7.072775510204082e-05, "loss": 2.0605, "step": 153500 }, { "FLOPS loss": 0.08411688357591629, "L0_d": 904.66, "MLM loss": 1.8506371974945068, "epoch": 3.28, "step": 153999 }, { "epoch": 3.28, "learning_rate": 7.062571428571428e-05, "loss": 2.0626, "step": 154000 }, { "FLOPS loss": 0.08170817792415619, "L0_d": 845.39, "MLM loss": 2.004265069961548, "epoch": 3.29, "step": 154499 }, { "epoch": 3.29, "learning_rate": 7.05238775510204e-05, "loss": 2.0625, "step": 154500 }, { "FLOPS loss": 0.08299441635608673, "L0_d": 780.56, "MLM loss": 2.034635543823242, "epoch": 3.3, "step": 154999 }, { "epoch": 3.3, "learning_rate": 7.042183673469388e-05, "loss": 2.0619, "step": 155000 }, { "FLOPS loss": 0.07230028510093689, "L0_d": 614.62, "MLM loss": 1.9387309551239014, "epoch": 3.31, "step": 155499 }, { "epoch": 3.31, "learning_rate": 7.031979591836735e-05, "loss": 2.0595, "step": 155500 }, { "FLOPS loss": 0.06355029344558716, "L0_d": 695.94, "MLM loss": 1.9958910942077637, "epoch": 3.32, "step": 155999 }, { "epoch": 3.32, "learning_rate": 7.021775510204081e-05, "loss": 2.0613, "step": 156000 }, { "FLOPS loss": 0.07059838622808456, "L0_d": 713.44, "MLM loss": 1.919121503829956, "epoch": 3.33, "step": 156499 }, { "epoch": 3.33, "learning_rate": 7.011571428571429e-05, "loss": 2.0596, "step": 156500 }, { "FLOPS loss": 0.07795380800962448, "L0_d": 713.16, "MLM loss": 1.9082648754119873, "epoch": 3.34, "step": 156999 }, { "epoch": 3.34, "learning_rate": 7.001387755102041e-05, "loss": 2.0574, "step": 157000 }, { "FLOPS loss": 0.08485406637191772, "L0_d": 786.42, "MLM loss": 1.8752378225326538, "epoch": 3.35, "step": 157499 }, { "epoch": 3.35, "learning_rate": 6.991183673469388e-05, "loss": 2.0575, "step": 157500 }, { "FLOPS loss": 0.08777129650115967, "L0_d": 797.16, "MLM loss": 2.156707286834717, "epoch": 3.36, "step": 157999 }, { "epoch": 3.36, "learning_rate": 6.980979591836735e-05, "loss": 2.0576, "step": 158000 }, { "FLOPS loss": 0.07295342534780502, "L0_d": 668.17, "MLM loss": 1.9450191259384155, "epoch": 3.38, "step": 158499 }, { "epoch": 3.38, "learning_rate": 6.970775510204082e-05, "loss": 2.059, "step": 158500 }, { "FLOPS loss": 0.08175527304410934, "L0_d": 751.41, "MLM loss": 2.059969186782837, "epoch": 3.39, "step": 158999 }, { "epoch": 3.39, "learning_rate": 6.960591836734694e-05, "loss": 2.0571, "step": 159000 }, { "FLOPS loss": 0.09450782090425491, "L0_d": 1096.31, "MLM loss": 1.8040263652801514, "epoch": 3.4, "step": 159499 }, { "epoch": 3.4, "learning_rate": 6.950387755102042e-05, "loss": 2.0554, "step": 159500 }, { "FLOPS loss": 0.08854455500841141, "L0_d": 894.12, "MLM loss": 1.8809754848480225, "epoch": 3.41, "step": 159999 }, { "epoch": 3.41, "learning_rate": 6.940183673469388e-05, "loss": 2.0546, "step": 160000 }, { "FLOPS loss": 0.07546830177307129, "L0_d": 837.89, "MLM loss": 2.0773215293884277, "epoch": 3.42, "step": 160499 }, { "epoch": 3.42, "learning_rate": 6.929979591836735e-05, "loss": 2.0534, "step": 160500 }, { "FLOPS loss": 0.06834632158279419, "L0_d": 618.45, "MLM loss": 1.9398113489151, "epoch": 3.43, "step": 160999 }, { "epoch": 3.43, "learning_rate": 6.919775510204081e-05, "loss": 2.0519, "step": 161000 }, { "FLOPS loss": 0.08442704379558563, "L0_d": 801.73, "MLM loss": 1.9613676071166992, "epoch": 3.44, "step": 161499 }, { "epoch": 3.44, "learning_rate": 6.909591836734695e-05, "loss": 2.0518, "step": 161500 }, { "FLOPS loss": 0.08665428310632706, "L0_d": 717.11, "MLM loss": 2.046541213989258, "epoch": 3.45, "step": 161999 }, { "epoch": 3.45, "learning_rate": 6.899387755102041e-05, "loss": 2.0489, "step": 162000 }, { "FLOPS loss": 0.08299516886472702, "L0_d": 726.27, "MLM loss": 1.958557367324829, "epoch": 3.46, "step": 162499 }, { "epoch": 3.46, "learning_rate": 6.889183673469388e-05, "loss": 2.0539, "step": 162500 }, { "FLOPS loss": 0.08221839368343353, "L0_d": 634.72, "MLM loss": 1.9704405069351196, "epoch": 3.47, "step": 162999 }, { "epoch": 3.47, "learning_rate": 6.878979591836735e-05, "loss": 2.0522, "step": 163000 }, { "FLOPS loss": 0.0814078077673912, "L0_d": 800.78, "MLM loss": 1.8931515216827393, "epoch": 3.48, "step": 163499 }, { "epoch": 3.48, "learning_rate": 6.868795918367348e-05, "loss": 2.0502, "step": 163500 }, { "FLOPS loss": 0.08102941513061523, "L0_d": 779.53, "MLM loss": 1.9019794464111328, "epoch": 3.49, "step": 163999 }, { "epoch": 3.49, "learning_rate": 6.858591836734694e-05, "loss": 2.0502, "step": 164000 }, { "FLOPS loss": 0.07698705792427063, "L0_d": 880.31, "MLM loss": 2.169793128967285, "epoch": 3.5, "step": 164499 }, { "epoch": 3.5, "learning_rate": 6.848387755102042e-05, "loss": 2.0495, "step": 164500 }, { "FLOPS loss": 0.07340797036886215, "L0_d": 836.19, "MLM loss": 2.1057276725769043, "epoch": 3.51, "step": 164999 }, { "epoch": 3.51, "learning_rate": 6.838183673469388e-05, "loss": 2.0522, "step": 165000 }, { "FLOPS loss": 0.06688041239976883, "L0_d": 516.97, "MLM loss": 1.8922147750854492, "epoch": 3.52, "step": 165499 }, { "epoch": 3.52, "learning_rate": 6.827979591836735e-05, "loss": 2.0489, "step": 165500 }, { "FLOPS loss": 0.07872175425291061, "L0_d": 844.69, "MLM loss": 2.0002338886260986, "epoch": 3.54, "step": 165999 }, { "epoch": 3.54, "learning_rate": 6.817795918367347e-05, "loss": 2.0477, "step": 166000 }, { "FLOPS loss": 0.06916598230600357, "L0_d": 696.31, "MLM loss": 1.8614131212234497, "epoch": 3.55, "step": 166499 }, { "epoch": 3.55, "learning_rate": 6.807591836734695e-05, "loss": 2.0469, "step": 166500 }, { "FLOPS loss": 0.08415335416793823, "L0_d": 721.41, "MLM loss": 2.031548500061035, "epoch": 3.56, "step": 166999 }, { "epoch": 3.56, "learning_rate": 6.797387755102041e-05, "loss": 2.0474, "step": 167000 }, { "FLOPS loss": 0.06985273212194443, "L0_d": 651.58, "MLM loss": 1.9898622035980225, "epoch": 3.57, "step": 167499 }, { "epoch": 3.57, "learning_rate": 6.787183673469389e-05, "loss": 2.0465, "step": 167500 }, { "FLOPS loss": 0.07171157002449036, "L0_d": 630.3, "MLM loss": 2.079214096069336, "epoch": 3.58, "step": 167999 }, { "epoch": 3.58, "learning_rate": 6.776979591836735e-05, "loss": 2.0488, "step": 168000 }, { "FLOPS loss": 0.10304520279169083, "L0_d": 889.3, "MLM loss": 1.9510843753814697, "epoch": 3.59, "step": 168499 }, { "epoch": 3.59, "learning_rate": 6.766775510204082e-05, "loss": 2.0445, "step": 168500 }, { "FLOPS loss": 0.07319284975528717, "L0_d": 823.53, "MLM loss": 1.9499073028564453, "epoch": 3.6, "step": 168999 }, { "epoch": 3.6, "learning_rate": 6.756591836734694e-05, "loss": 2.0458, "step": 169000 }, { "FLOPS loss": 0.07182403653860092, "L0_d": 741.36, "MLM loss": 1.8717212677001953, "epoch": 3.61, "step": 169499 }, { "epoch": 3.61, "learning_rate": 6.746387755102042e-05, "loss": 2.0428, "step": 169500 }, { "FLOPS loss": 0.08154419809579849, "L0_d": 823.67, "MLM loss": 1.9579507112503052, "epoch": 3.62, "step": 169999 }, { "epoch": 3.62, "learning_rate": 6.736183673469388e-05, "loss": 2.0461, "step": 170000 }, { "FLOPS loss": 0.07501042634248734, "L0_d": 733.5, "MLM loss": 1.9381325244903564, "epoch": 3.63, "step": 170499 }, { "epoch": 3.63, "learning_rate": 6.725979591836735e-05, "loss": 2.0417, "step": 170500 }, { "FLOPS loss": 0.07502731680870056, "L0_d": 756.14, "MLM loss": 2.078585624694824, "epoch": 3.64, "step": 170999 }, { "epoch": 3.64, "learning_rate": 6.715775510204083e-05, "loss": 2.0407, "step": 171000 }, { "FLOPS loss": 0.07120244204998016, "L0_d": 591.41, "MLM loss": 2.0549476146698, "epoch": 3.65, "step": 171499 }, { "epoch": 3.65, "learning_rate": 6.705571428571429e-05, "loss": 2.0434, "step": 171500 }, { "FLOPS loss": 0.08011957257986069, "L0_d": 699.62, "MLM loss": 1.9056050777435303, "epoch": 3.66, "step": 171999 }, { "epoch": 3.66, "learning_rate": 6.695367346938775e-05, "loss": 2.0408, "step": 172000 }, { "FLOPS loss": 0.09575420618057251, "L0_d": 1030.78, "MLM loss": 1.995476484298706, "epoch": 3.67, "step": 172499 }, { "epoch": 3.67, "learning_rate": 6.685183673469389e-05, "loss": 2.043, "step": 172500 }, { "FLOPS loss": 0.08612006902694702, "L0_d": 676.3, "MLM loss": 2.0726420879364014, "epoch": 3.68, "step": 172999 }, { "epoch": 3.68, "learning_rate": 6.674979591836735e-05, "loss": 2.0417, "step": 173000 }, { "FLOPS loss": 0.06685136258602142, "L0_d": 1046.97, "MLM loss": 1.9004302024841309, "epoch": 3.69, "step": 173499 }, { "epoch": 3.69, "learning_rate": 6.664775510204082e-05, "loss": 2.0411, "step": 173500 }, { "FLOPS loss": 0.0729975476861, "L0_d": 785.67, "MLM loss": 2.033959150314331, "epoch": 3.71, "step": 173999 }, { "epoch": 3.71, "learning_rate": 6.654571428571428e-05, "loss": 2.0404, "step": 174000 }, { "FLOPS loss": 0.07711607962846756, "L0_d": 713.72, "MLM loss": 1.8837366104125977, "epoch": 3.72, "step": 174499 }, { "epoch": 3.72, "learning_rate": 6.644367346938776e-05, "loss": 2.0394, "step": 174500 }, { "FLOPS loss": 0.07960330694913864, "L0_d": 759.89, "MLM loss": 1.871514081954956, "epoch": 3.73, "step": 174999 }, { "epoch": 3.73, "learning_rate": 6.634183673469388e-05, "loss": 2.0399, "step": 175000 }, { "FLOPS loss": 0.08591853827238083, "L0_d": 1124.56, "MLM loss": 1.882832646369934, "epoch": 3.74, "step": 175499 }, { "epoch": 3.74, "learning_rate": 6.623979591836735e-05, "loss": 2.0366, "step": 175500 }, { "FLOPS loss": 0.06938300281763077, "L0_d": 552.39, "MLM loss": 2.0231130123138428, "epoch": 3.75, "step": 175999 }, { "epoch": 3.75, "learning_rate": 6.613775510204083e-05, "loss": 2.0372, "step": 176000 }, { "FLOPS loss": 0.06626979261636734, "L0_d": 547.67, "MLM loss": 1.9859817028045654, "epoch": 3.76, "step": 176499 }, { "epoch": 3.76, "learning_rate": 6.603571428571429e-05, "loss": 2.0393, "step": 176500 }, { "FLOPS loss": 0.08292733132839203, "L0_d": 876.09, "MLM loss": 1.8628358840942383, "epoch": 3.77, "step": 176999 }, { "epoch": 3.77, "learning_rate": 6.593367346938775e-05, "loss": 2.0362, "step": 177000 }, { "FLOPS loss": 0.0899147242307663, "L0_d": 759.33, "MLM loss": 1.9406108856201172, "epoch": 3.78, "step": 177499 }, { "epoch": 3.78, "learning_rate": 6.583183673469389e-05, "loss": 2.0333, "step": 177500 }, { "FLOPS loss": 0.07696028053760529, "L0_d": 644.02, "MLM loss": 2.1251962184906006, "epoch": 3.79, "step": 177999 }, { "epoch": 3.79, "learning_rate": 6.572979591836735e-05, "loss": 2.0329, "step": 178000 }, { "FLOPS loss": 0.07335279136896133, "L0_d": 671.97, "MLM loss": 1.8094111680984497, "epoch": 3.8, "step": 178499 }, { "epoch": 3.8, "learning_rate": 6.562775510204082e-05, "loss": 2.0354, "step": 178500 }, { "FLOPS loss": 0.05993517488241196, "L0_d": 629.39, "MLM loss": 1.9419050216674805, "epoch": 3.81, "step": 178999 }, { "epoch": 3.81, "learning_rate": 6.55257142857143e-05, "loss": 2.0345, "step": 179000 }, { "FLOPS loss": 0.08473289757966995, "L0_d": 770.55, "MLM loss": 1.9759211540222168, "epoch": 3.82, "step": 179499 }, { "epoch": 3.82, "learning_rate": 6.54238775510204e-05, "loss": 2.0384, "step": 179500 }, { "FLOPS loss": 0.07711312174797058, "L0_d": 760.28, "MLM loss": 1.8868836164474487, "epoch": 3.83, "step": 179999 }, { "epoch": 3.83, "learning_rate": 6.532183673469388e-05, "loss": 2.0349, "step": 180000 }, { "FLOPS loss": 0.0859721377491951, "L0_d": 608.56, "MLM loss": 1.9114336967468262, "epoch": 3.84, "step": 180499 }, { "epoch": 3.84, "learning_rate": 6.521979591836736e-05, "loss": 2.0295, "step": 180500 }, { "FLOPS loss": 0.08973046392202377, "L0_d": 883.16, "MLM loss": 1.7612589597702026, "epoch": 3.85, "step": 180999 }, { "epoch": 3.85, "learning_rate": 6.511775510204082e-05, "loss": 2.0286, "step": 181000 }, { "FLOPS loss": 0.06494830548763275, "L0_d": 711.47, "MLM loss": 1.9608688354492188, "epoch": 3.87, "step": 181499 }, { "epoch": 3.87, "learning_rate": 6.501571428571429e-05, "loss": 2.0352, "step": 181500 }, { "FLOPS loss": 0.08908434212207794, "L0_d": 922.59, "MLM loss": 1.9575316905975342, "epoch": 3.88, "step": 181999 }, { "epoch": 3.88, "learning_rate": 6.491387755102041e-05, "loss": 2.0276, "step": 182000 }, { "FLOPS loss": 0.07852374762296677, "L0_d": 824.45, "MLM loss": 2.036099433898926, "epoch": 3.89, "step": 182499 }, { "epoch": 3.89, "learning_rate": 6.481183673469387e-05, "loss": 2.0341, "step": 182500 }, { "FLOPS loss": 0.06813400238752365, "L0_d": 739.58, "MLM loss": 1.91340970993042, "epoch": 3.9, "step": 182999 }, { "epoch": 3.9, "learning_rate": 6.470979591836735e-05, "loss": 2.0341, "step": 183000 }, { "FLOPS loss": 0.08716423064470291, "L0_d": 1196.8, "MLM loss": 1.9538612365722656, "epoch": 3.91, "step": 183499 }, { "epoch": 3.91, "learning_rate": 6.460775510204082e-05, "loss": 2.0333, "step": 183500 }, { "FLOPS loss": 0.07709267735481262, "L0_d": 814.75, "MLM loss": 1.9605598449707031, "epoch": 3.92, "step": 183999 }, { "epoch": 3.92, "learning_rate": 6.45057142857143e-05, "loss": 2.0297, "step": 184000 }, { "FLOPS loss": 0.08532577753067017, "L0_d": 1042.2, "MLM loss": 1.9132826328277588, "epoch": 3.93, "step": 184499 }, { "epoch": 3.93, "learning_rate": 6.440367346938776e-05, "loss": 2.028, "step": 184500 }, { "FLOPS loss": 0.08234952390193939, "L0_d": 884.34, "MLM loss": 1.7370164394378662, "epoch": 3.94, "step": 184999 }, { "epoch": 3.94, "learning_rate": 6.430183673469388e-05, "loss": 2.0298, "step": 185000 }, { "FLOPS loss": 0.058814145624637604, "L0_d": 477.72, "MLM loss": 1.8557758331298828, "epoch": 3.95, "step": 185499 }, { "epoch": 3.95, "learning_rate": 6.419979591836735e-05, "loss": 2.0295, "step": 185500 }, { "FLOPS loss": 0.06794530153274536, "L0_d": 697.06, "MLM loss": 1.875330924987793, "epoch": 3.96, "step": 185999 }, { "epoch": 3.96, "learning_rate": 6.409775510204082e-05, "loss": 2.0248, "step": 186000 }, { "FLOPS loss": 0.07963329553604126, "L0_d": 585.33, "MLM loss": 1.9809973239898682, "epoch": 3.97, "step": 186499 }, { "epoch": 3.97, "learning_rate": 6.399571428571429e-05, "loss": 2.0245, "step": 186500 }, { "FLOPS loss": 0.08781524002552032, "L0_d": 1159.09, "MLM loss": 2.064220905303955, "epoch": 3.98, "step": 186999 }, { "epoch": 3.98, "learning_rate": 6.389387755102041e-05, "loss": 2.024, "step": 187000 }, { "FLOPS loss": 0.07237225025892258, "L0_d": 772.23, "MLM loss": 2.056314468383789, "epoch": 3.99, "step": 187499 }, { "epoch": 3.99, "learning_rate": 6.379183673469387e-05, "loss": 2.0272, "step": 187500 }, { "FLOPS loss": 0.07651927322149277, "L0_d": 869.56, "MLM loss": 1.8988327980041504, "epoch": 4.0, "step": 187999 }, { "epoch": 4.0, "learning_rate": 6.369000000000001e-05, "loss": 2.0273, "step": 188000 }, { "FLOPS loss": 0.06517531722784042, "L0_d": 584.02, "MLM loss": 2.0192251205444336, "epoch": 4.01, "step": 188499 }, { "epoch": 4.01, "learning_rate": 6.358795918367347e-05, "loss": 2.0263, "step": 188500 }, { "FLOPS loss": 0.0843081995844841, "L0_d": 947.58, "MLM loss": 1.9554133415222168, "epoch": 4.02, "step": 188999 }, { "epoch": 4.02, "learning_rate": 6.348591836734694e-05, "loss": 2.0245, "step": 189000 }, { "FLOPS loss": 0.0910363495349884, "L0_d": 998.47, "MLM loss": 1.9304252862930298, "epoch": 4.04, "step": 189499 }, { "epoch": 4.04, "learning_rate": 6.338387755102042e-05, "loss": 2.0206, "step": 189500 }, { "FLOPS loss": 0.08949656784534454, "L0_d": 986.08, "MLM loss": 1.988331913948059, "epoch": 4.05, "step": 189999 }, { "epoch": 4.05, "learning_rate": 6.328183673469388e-05, "loss": 2.0216, "step": 190000 }, { "FLOPS loss": 0.07536551356315613, "L0_d": 1129.55, "MLM loss": 1.943612813949585, "epoch": 4.06, "step": 190499 }, { "epoch": 4.06, "learning_rate": 6.317979591836735e-05, "loss": 2.0232, "step": 190500 }, { "FLOPS loss": 0.10584509372711182, "L0_d": 1000.84, "MLM loss": 1.8180073499679565, "epoch": 4.07, "step": 190999 }, { "epoch": 4.07, "learning_rate": 6.307775510204081e-05, "loss": 2.0192, "step": 191000 }, { "FLOPS loss": 0.09958112239837646, "L0_d": 976.05, "MLM loss": 1.8265557289123535, "epoch": 4.08, "step": 191499 }, { "epoch": 4.08, "learning_rate": 6.297571428571429e-05, "loss": 2.0236, "step": 191500 }, { "FLOPS loss": 0.06521303206682205, "L0_d": 791.47, "MLM loss": 2.189824104309082, "epoch": 4.09, "step": 191999 }, { "epoch": 4.09, "learning_rate": 6.287367346938777e-05, "loss": 2.0216, "step": 192000 }, { "FLOPS loss": 0.07057648152112961, "L0_d": 820.94, "MLM loss": 1.8760672807693481, "epoch": 4.1, "step": 192499 }, { "epoch": 4.1, "learning_rate": 6.277163265306123e-05, "loss": 2.0241, "step": 192500 }, { "FLOPS loss": 0.09540248662233353, "L0_d": 1057.05, "MLM loss": 1.9012943506240845, "epoch": 4.11, "step": 192999 }, { "epoch": 4.11, "learning_rate": 6.26695918367347e-05, "loss": 2.018, "step": 193000 }, { "FLOPS loss": 0.05730264261364937, "L0_d": 657.75, "MLM loss": 1.9492607116699219, "epoch": 4.12, "step": 193499 }, { "epoch": 4.12, "learning_rate": 6.256775510204082e-05, "loss": 2.0174, "step": 193500 }, { "FLOPS loss": 0.0790947675704956, "L0_d": 768.97, "MLM loss": 1.8801112174987793, "epoch": 4.13, "step": 193999 }, { "epoch": 4.13, "learning_rate": 6.24657142857143e-05, "loss": 2.0205, "step": 194000 }, { "FLOPS loss": 0.05512884259223938, "L0_d": 577.09, "MLM loss": 1.9257984161376953, "epoch": 4.14, "step": 194499 }, { "epoch": 4.14, "learning_rate": 6.236367346938776e-05, "loss": 2.0185, "step": 194500 }, { "FLOPS loss": 0.08608260005712509, "L0_d": 944.81, "MLM loss": 2.005255699157715, "epoch": 4.15, "step": 194999 }, { "epoch": 4.15, "learning_rate": 6.226163265306122e-05, "loss": 2.0164, "step": 195000 }, { "FLOPS loss": 0.07035192102193832, "L0_d": 993.94, "MLM loss": 1.9241690635681152, "epoch": 4.16, "step": 195499 }, { "epoch": 4.16, "learning_rate": 6.215979591836735e-05, "loss": 2.019, "step": 195500 }, { "FLOPS loss": 0.06550294160842896, "L0_d": 719.12, "MLM loss": 1.9413261413574219, "epoch": 4.17, "step": 195999 }, { "epoch": 4.17, "learning_rate": 6.205775510204081e-05, "loss": 2.0138, "step": 196000 }, { "FLOPS loss": 0.06137542426586151, "L0_d": 583.73, "MLM loss": 1.9586896896362305, "epoch": 4.18, "step": 196499 }, { "epoch": 4.18, "learning_rate": 6.195571428571429e-05, "loss": 2.0185, "step": 196500 }, { "FLOPS loss": 0.06665011495351791, "L0_d": 587.61, "MLM loss": 1.8961763381958008, "epoch": 4.2, "step": 196999 }, { "epoch": 4.2, "learning_rate": 6.185367346938777e-05, "loss": 2.0168, "step": 197000 }, { "FLOPS loss": 0.07283007353544235, "L0_d": 620.45, "MLM loss": 1.9708335399627686, "epoch": 4.21, "step": 197499 }, { "epoch": 4.21, "learning_rate": 6.175163265306123e-05, "loss": 2.0135, "step": 197500 }, { "FLOPS loss": 0.07253549993038177, "L0_d": 927.98, "MLM loss": 1.9713668823242188, "epoch": 4.22, "step": 197999 }, { "epoch": 4.22, "learning_rate": 6.164979591836735e-05, "loss": 2.0137, "step": 198000 }, { "FLOPS loss": 0.06769657135009766, "L0_d": 544.48, "MLM loss": 1.9186091423034668, "epoch": 4.23, "step": 198499 }, { "epoch": 4.23, "learning_rate": 6.154775510204082e-05, "loss": 2.0174, "step": 198500 }, { "FLOPS loss": 0.08714200556278229, "L0_d": 905.88, "MLM loss": 1.8871827125549316, "epoch": 4.24, "step": 198999 }, { "epoch": 4.24, "learning_rate": 6.144571428571428e-05, "loss": 2.015, "step": 199000 }, { "FLOPS loss": 0.07281828671693802, "L0_d": 549.28, "MLM loss": 1.9224956035614014, "epoch": 4.25, "step": 199499 }, { "epoch": 4.25, "learning_rate": 6.134367346938776e-05, "loss": 2.0136, "step": 199500 }, { "FLOPS loss": 0.06316013634204865, "L0_d": 645.33, "MLM loss": 1.990523338317871, "epoch": 4.26, "step": 199999 }, { "epoch": 4.26, "learning_rate": 6.124183673469388e-05, "loss": 2.018, "step": 200000 }, { "FLOPS loss": 0.09500326961278915, "L0_d": 838.77, "MLM loss": 1.8790405988693237, "epoch": 4.27, "step": 200499 }, { "epoch": 4.27, "learning_rate": 6.113979591836734e-05, "loss": 2.0115, "step": 200500 }, { "FLOPS loss": 0.07647743076086044, "L0_d": 746.02, "MLM loss": 2.0634748935699463, "epoch": 4.28, "step": 200999 }, { "epoch": 4.28, "learning_rate": 6.103775510204082e-05, "loss": 2.0179, "step": 201000 }, { "FLOPS loss": 0.06590724736452103, "L0_d": 750.33, "MLM loss": 2.047532081604004, "epoch": 4.29, "step": 201499 }, { "epoch": 4.29, "learning_rate": 6.093571428571429e-05, "loss": 2.0102, "step": 201500 }, { "FLOPS loss": 0.0764516219496727, "L0_d": 672.33, "MLM loss": 1.8584916591644287, "epoch": 4.3, "step": 201999 }, { "epoch": 4.3, "learning_rate": 6.083367346938775e-05, "loss": 2.0127, "step": 202000 }, { "FLOPS loss": 0.09220387041568756, "L0_d": 1029.03, "MLM loss": 1.7153816223144531, "epoch": 4.31, "step": 202499 }, { "epoch": 4.31, "learning_rate": 6.073183673469388e-05, "loss": 2.0114, "step": 202500 }, { "FLOPS loss": 0.07743317633867264, "L0_d": 651.98, "MLM loss": 2.0165162086486816, "epoch": 4.32, "step": 202999 }, { "epoch": 4.32, "learning_rate": 6.062979591836735e-05, "loss": 2.0095, "step": 203000 }, { "FLOPS loss": 0.05509522557258606, "L0_d": 613.44, "MLM loss": 2.139920949935913, "epoch": 4.33, "step": 203499 }, { "epoch": 4.33, "learning_rate": 6.0527755102040816e-05, "loss": 2.011, "step": 203500 }, { "FLOPS loss": 0.07268443703651428, "L0_d": 747.22, "MLM loss": 1.8006410598754883, "epoch": 4.34, "step": 203999 }, { "epoch": 4.34, "learning_rate": 6.042571428571429e-05, "loss": 2.0097, "step": 204000 }, { "FLOPS loss": 0.08287376165390015, "L0_d": 773.5, "MLM loss": 1.8289992809295654, "epoch": 4.35, "step": 204499 }, { "epoch": 4.35, "learning_rate": 6.0323877551020415e-05, "loss": 2.0089, "step": 204500 }, { "FLOPS loss": 0.0774405300617218, "L0_d": 866.12, "MLM loss": 1.817805528640747, "epoch": 4.37, "step": 204999 }, { "epoch": 4.37, "learning_rate": 6.022183673469388e-05, "loss": 2.0131, "step": 205000 }, { "FLOPS loss": 0.07295011729001999, "L0_d": 856.09, "MLM loss": 1.9340803623199463, "epoch": 4.38, "step": 205499 }, { "epoch": 4.38, "learning_rate": 6.011979591836735e-05, "loss": 2.01, "step": 205500 }, { "FLOPS loss": 0.06522063910961151, "L0_d": 941.17, "MLM loss": 1.7702338695526123, "epoch": 4.39, "step": 205999 }, { "epoch": 4.39, "learning_rate": 6.0017755102040816e-05, "loss": 2.0094, "step": 206000 }, { "FLOPS loss": 0.0726202130317688, "L0_d": 824.72, "MLM loss": 1.8889031410217285, "epoch": 4.4, "step": 206499 }, { "epoch": 4.4, "learning_rate": 5.991571428571429e-05, "loss": 2.0043, "step": 206500 }, { "FLOPS loss": 0.08556578308343887, "L0_d": 686.48, "MLM loss": 1.9422351121902466, "epoch": 4.41, "step": 206999 }, { "epoch": 4.41, "learning_rate": 5.9813877551020415e-05, "loss": 2.0111, "step": 207000 }, { "FLOPS loss": 0.0572795532643795, "L0_d": 781.86, "MLM loss": 2.0047783851623535, "epoch": 4.42, "step": 207499 }, { "epoch": 4.42, "learning_rate": 5.971183673469388e-05, "loss": 2.0075, "step": 207500 }, { "FLOPS loss": 0.07259980589151382, "L0_d": 992.34, "MLM loss": 1.8562628030776978, "epoch": 4.43, "step": 207999 }, { "epoch": 4.43, "learning_rate": 5.960979591836735e-05, "loss": 2.0033, "step": 208000 }, { "FLOPS loss": 0.08666159957647324, "L0_d": 759.8, "MLM loss": 1.9893381595611572, "epoch": 4.44, "step": 208499 }, { "epoch": 4.44, "learning_rate": 5.9507755102040816e-05, "loss": 2.0072, "step": 208500 }, { "FLOPS loss": 0.07965318113565445, "L0_d": 915.02, "MLM loss": 2.064954996109009, "epoch": 4.45, "step": 208999 }, { "epoch": 4.45, "learning_rate": 5.940591836734695e-05, "loss": 2.0075, "step": 209000 }, { "FLOPS loss": 0.06595531851053238, "L0_d": 577.44, "MLM loss": 2.1270761489868164, "epoch": 4.46, "step": 209499 }, { "epoch": 4.46, "learning_rate": 5.9303877551020415e-05, "loss": 1.9995, "step": 209500 }, { "FLOPS loss": 0.08501884341239929, "L0_d": 678.55, "MLM loss": 1.8834452629089355, "epoch": 4.47, "step": 209999 }, { "epoch": 4.47, "learning_rate": 5.9201836734693886e-05, "loss": 2.0079, "step": 210000 }, { "FLOPS loss": 0.07586806267499924, "L0_d": 865.56, "MLM loss": 1.942221760749817, "epoch": 4.48, "step": 210499 }, { "epoch": 4.48, "learning_rate": 5.909979591836735e-05, "loss": 2.0091, "step": 210500 }, { "FLOPS loss": 0.07614757120609283, "L0_d": 753.77, "MLM loss": 1.9882011413574219, "epoch": 4.49, "step": 210999 }, { "epoch": 4.49, "learning_rate": 5.8997959183673465e-05, "loss": 2.0061, "step": 211000 }, { "FLOPS loss": 0.07684236764907837, "L0_d": 642.22, "MLM loss": 1.8372766971588135, "epoch": 4.5, "step": 211499 }, { "epoch": 4.5, "learning_rate": 5.8895918367346936e-05, "loss": 2.0079, "step": 211500 }, { "FLOPS loss": 0.08997940272092819, "L0_d": 783.89, "MLM loss": 1.818095088005066, "epoch": 4.51, "step": 211999 }, { "epoch": 4.51, "learning_rate": 5.8793877551020414e-05, "loss": 2.0061, "step": 212000 }, { "FLOPS loss": 0.08491813391447067, "L0_d": 1031.25, "MLM loss": 1.9548670053482056, "epoch": 4.53, "step": 212499 }, { "epoch": 4.53, "learning_rate": 5.8691836734693886e-05, "loss": 2.0034, "step": 212500 }, { "FLOPS loss": 0.09706159681081772, "L0_d": 786.84, "MLM loss": 1.8384318351745605, "epoch": 4.54, "step": 212999 }, { "epoch": 4.54, "learning_rate": 5.859e-05, "loss": 1.9993, "step": 213000 }, { "FLOPS loss": 0.06909748166799545, "L0_d": 762.97, "MLM loss": 1.8404794931411743, "epoch": 4.55, "step": 213499 }, { "epoch": 4.55, "learning_rate": 5.848795918367347e-05, "loss": 2.0042, "step": 213500 }, { "FLOPS loss": 0.06857288628816605, "L0_d": 525.5, "MLM loss": 1.8500382900238037, "epoch": 4.56, "step": 213999 }, { "epoch": 4.56, "learning_rate": 5.8385918367346936e-05, "loss": 2.0063, "step": 214000 }, { "FLOPS loss": 0.07077398151159286, "L0_d": 715.25, "MLM loss": 1.8898844718933105, "epoch": 4.57, "step": 214499 }, { "epoch": 4.57, "learning_rate": 5.8283877551020414e-05, "loss": 2.0038, "step": 214500 }, { "FLOPS loss": 0.08178684860467911, "L0_d": 642.12, "MLM loss": 1.9428343772888184, "epoch": 4.58, "step": 214999 }, { "epoch": 4.58, "learning_rate": 5.8181836734693886e-05, "loss": 2.0035, "step": 215000 }, { "FLOPS loss": 0.07514248043298721, "L0_d": 566.45, "MLM loss": 1.8526628017425537, "epoch": 4.59, "step": 215499 }, { "epoch": 4.59, "learning_rate": 5.808e-05, "loss": 2.0024, "step": 215500 }, { "FLOPS loss": 0.06888828426599503, "L0_d": 753.84, "MLM loss": 1.8139371871948242, "epoch": 4.6, "step": 215999 }, { "epoch": 4.6, "learning_rate": 5.797795918367347e-05, "loss": 2.001, "step": 216000 }, { "FLOPS loss": 0.07311675697565079, "L0_d": 1075.84, "MLM loss": 1.8174786567687988, "epoch": 4.61, "step": 216499 }, { "epoch": 4.61, "learning_rate": 5.7875918367346936e-05, "loss": 1.9985, "step": 216500 }, { "FLOPS loss": 0.0811404287815094, "L0_d": 794.91, "MLM loss": 1.907740831375122, "epoch": 4.62, "step": 216999 }, { "epoch": 4.62, "learning_rate": 5.777387755102041e-05, "loss": 1.9996, "step": 217000 }, { "FLOPS loss": 0.07732479274272919, "L0_d": 882.95, "MLM loss": 1.8914384841918945, "epoch": 4.63, "step": 217499 }, { "epoch": 4.63, "learning_rate": 5.7671836734693885e-05, "loss": 2.0016, "step": 217500 }, { "FLOPS loss": 0.08881332725286484, "L0_d": 730.89, "MLM loss": 1.7773964405059814, "epoch": 4.64, "step": 217999 }, { "epoch": 4.64, "learning_rate": 5.757e-05, "loss": 2.0026, "step": 218000 }, { "FLOPS loss": 0.08179426938295364, "L0_d": 1269.34, "MLM loss": 1.8794455528259277, "epoch": 4.65, "step": 218499 }, { "epoch": 4.65, "learning_rate": 5.746795918367347e-05, "loss": 2.0016, "step": 218500 }, { "FLOPS loss": 0.07119172066450119, "L0_d": 803.27, "MLM loss": 1.8622592687606812, "epoch": 4.66, "step": 218999 }, { "epoch": 4.66, "learning_rate": 5.7365918367346936e-05, "loss": 1.9979, "step": 219000 }, { "FLOPS loss": 0.06549478322267532, "L0_d": 638.23, "MLM loss": 1.930425763130188, "epoch": 4.67, "step": 219499 }, { "epoch": 4.67, "learning_rate": 5.726387755102041e-05, "loss": 1.9995, "step": 219500 }, { "FLOPS loss": 0.07244117558002472, "L0_d": 1028.78, "MLM loss": 1.8141852617263794, "epoch": 4.69, "step": 219999 }, { "epoch": 4.69, "learning_rate": 5.7161836734693885e-05, "loss": 1.9981, "step": 220000 }, { "FLOPS loss": 0.07125560194253922, "L0_d": 1015.98, "MLM loss": 1.9103002548217773, "epoch": 4.7, "step": 220499 }, { "epoch": 4.7, "learning_rate": 5.706e-05, "loss": 1.9935, "step": 220500 }, { "FLOPS loss": 0.10560493171215057, "L0_d": 1126.92, "MLM loss": 1.9834262132644653, "epoch": 4.71, "step": 220999 }, { "epoch": 4.71, "learning_rate": 5.695795918367347e-05, "loss": 1.994, "step": 221000 }, { "FLOPS loss": 0.06856855005025864, "L0_d": 628.53, "MLM loss": 2.0253748893737793, "epoch": 4.72, "step": 221499 }, { "epoch": 4.72, "learning_rate": 5.6855918367346935e-05, "loss": 1.9986, "step": 221500 }, { "FLOPS loss": 0.09187950193881989, "L0_d": 1048.83, "MLM loss": 1.8531248569488525, "epoch": 4.73, "step": 221999 }, { "epoch": 4.73, "learning_rate": 5.675387755102041e-05, "loss": 1.9964, "step": 222000 }, { "FLOPS loss": 0.07964413613080978, "L0_d": 870.2, "MLM loss": 1.8161282539367676, "epoch": 4.74, "step": 222499 }, { "epoch": 4.74, "learning_rate": 5.665183673469388e-05, "loss": 1.997, "step": 222500 }, { "FLOPS loss": 0.06449469178915024, "L0_d": 703.88, "MLM loss": 1.9795019626617432, "epoch": 4.75, "step": 222999 }, { "epoch": 4.75, "learning_rate": 5.6549795918367357e-05, "loss": 1.9936, "step": 223000 }, { "FLOPS loss": 0.10031864047050476, "L0_d": 774.0, "MLM loss": 1.8794991970062256, "epoch": 4.76, "step": 223499 }, { "epoch": 4.76, "learning_rate": 5.644795918367347e-05, "loss": 1.9922, "step": 223500 }, { "FLOPS loss": 0.07564357668161392, "L0_d": 614.22, "MLM loss": 2.004389524459839, "epoch": 4.77, "step": 223999 }, { "epoch": 4.77, "learning_rate": 5.634591836734694e-05, "loss": 1.9937, "step": 224000 }, { "FLOPS loss": 0.07761155813932419, "L0_d": 952.73, "MLM loss": 1.999483346939087, "epoch": 4.78, "step": 224499 }, { "epoch": 4.78, "learning_rate": 5.624387755102041e-05, "loss": 2.0023, "step": 224500 }, { "FLOPS loss": 0.09304012358188629, "L0_d": 772.3, "MLM loss": 1.9601857662200928, "epoch": 4.79, "step": 224999 }, { "epoch": 4.79, "learning_rate": 5.614183673469388e-05, "loss": 1.9928, "step": 225000 }, { "FLOPS loss": 0.08472666889429092, "L0_d": 944.25, "MLM loss": 1.9953516721725464, "epoch": 4.8, "step": 225499 }, { "epoch": 4.8, "learning_rate": 5.6039795918367356e-05, "loss": 1.9958, "step": 225500 }, { "FLOPS loss": 0.0648883655667305, "L0_d": 591.64, "MLM loss": 1.7785028219223022, "epoch": 4.81, "step": 225999 }, { "epoch": 4.81, "learning_rate": 5.593795918367347e-05, "loss": 1.9923, "step": 226000 }, { "FLOPS loss": 0.06218310445547104, "L0_d": 643.28, "MLM loss": 1.9573731422424316, "epoch": 4.82, "step": 226499 }, { "epoch": 4.82, "learning_rate": 5.583591836734694e-05, "loss": 1.9919, "step": 226500 }, { "FLOPS loss": 0.09281591325998306, "L0_d": 1149.56, "MLM loss": 1.9270288944244385, "epoch": 4.83, "step": 226999 }, { "epoch": 4.83, "learning_rate": 5.5733877551020406e-05, "loss": 1.9941, "step": 227000 }, { "FLOPS loss": 0.07068472355604172, "L0_d": 792.38, "MLM loss": 1.9185221195220947, "epoch": 4.84, "step": 227499 }, { "epoch": 4.84, "learning_rate": 5.563183673469388e-05, "loss": 1.9916, "step": 227500 }, { "FLOPS loss": 0.07062963396310806, "L0_d": 712.59, "MLM loss": 1.8447917699813843, "epoch": 4.86, "step": 227999 }, { "epoch": 4.86, "learning_rate": 5.552979591836734e-05, "loss": 1.9943, "step": 228000 }, { "FLOPS loss": 0.080656997859478, "L0_d": 760.47, "MLM loss": 1.926360845565796, "epoch": 4.87, "step": 228499 }, { "epoch": 4.87, "learning_rate": 5.542795918367347e-05, "loss": 1.9918, "step": 228500 }, { "FLOPS loss": 0.07454971224069595, "L0_d": 764.95, "MLM loss": 1.9017677307128906, "epoch": 4.88, "step": 228999 }, { "epoch": 4.88, "learning_rate": 5.532591836734694e-05, "loss": 1.9924, "step": 229000 }, { "FLOPS loss": 0.07176411151885986, "L0_d": 671.66, "MLM loss": 2.02742075920105, "epoch": 4.89, "step": 229499 }, { "epoch": 4.89, "learning_rate": 5.5223877551020406e-05, "loss": 1.9887, "step": 229500 }, { "FLOPS loss": 0.09042114019393921, "L0_d": 706.11, "MLM loss": 1.874399185180664, "epoch": 4.9, "step": 229999 }, { "epoch": 4.9, "learning_rate": 5.512183673469388e-05, "loss": 1.9916, "step": 230000 }, { "FLOPS loss": 0.0008960551349446177, "L0_d": 31998.22, "MLM loss": 1.7690234184265137, "epoch": 4.91, "step": 230499 }, { "epoch": 4.91, "learning_rate": 5.5020000000000005e-05, "loss": 1.8658, "step": 230500 }, { "FLOPS loss": 0.0015128779923543334, "L0_d": 31847.52, "MLM loss": 1.8071842193603516, "epoch": 4.92, "step": 230999 }, { "epoch": 4.92, "learning_rate": 5.491795918367347e-05, "loss": 1.8681, "step": 231000 }, { "FLOPS loss": 0.002377150347456336, "L0_d": 29792.11, "MLM loss": 1.9174680709838867, "epoch": 4.93, "step": 231499 }, { "epoch": 4.93, "learning_rate": 5.481591836734694e-05, "loss": 1.8658, "step": 231500 }, { "FLOPS loss": 0.002215802203863859, "L0_d": 24194.72, "MLM loss": 1.8142987489700317, "epoch": 4.94, "step": 231999 }, { "epoch": 4.94, "learning_rate": 5.4713877551020406e-05, "loss": 1.8681, "step": 232000 }, { "FLOPS loss": 0.002735304646193981, "L0_d": 20741.98, "MLM loss": 1.9606852531433105, "epoch": 4.95, "step": 232499 }, { "epoch": 4.95, "learning_rate": 5.461204081632654e-05, "loss": 1.8704, "step": 232500 }, { "FLOPS loss": 0.0025493777357041836, "L0_d": 18399.17, "MLM loss": 1.8065788745880127, "epoch": 4.96, "step": 232999 }, { "epoch": 4.96, "learning_rate": 5.4510000000000005e-05, "loss": 1.8718, "step": 233000 }, { "FLOPS loss": 0.0038267311174422503, "L0_d": 18764.34, "MLM loss": 1.9012820720672607, "epoch": 4.97, "step": 233499 }, { "epoch": 4.97, "learning_rate": 5.4407959183673476e-05, "loss": 1.8723, "step": 233500 }, { "FLOPS loss": 0.004111337009817362, "L0_d": 17049.22, "MLM loss": 1.8614271879196167, "epoch": 4.98, "step": 233999 }, { "epoch": 4.98, "learning_rate": 5.430591836734694e-05, "loss": 1.8725, "step": 234000 }, { "FLOPS loss": 0.00438784621655941, "L0_d": 15735.7, "MLM loss": 1.8476033210754395, "epoch": 4.99, "step": 234499 }, { "epoch": 4.99, "learning_rate": 5.420387755102041e-05, "loss": 1.8701, "step": 234500 }, { "FLOPS loss": 0.00378360110335052, "L0_d": 12747.81, "MLM loss": 1.8290207386016846, "epoch": 5.0, "step": 234999 }, { "epoch": 5.0, "learning_rate": 5.410183673469388e-05, "loss": 1.8713, "step": 235000 }, { "FLOPS loss": 0.0036748433485627174, "L0_d": 10985.72, "MLM loss": 1.953842043876648, "epoch": 5.02, "step": 235499 }, { "epoch": 5.02, "learning_rate": 5.399979591836735e-05, "loss": 1.8715, "step": 235500 }, { "FLOPS loss": 0.0055288574658334255, "L0_d": 12111.08, "MLM loss": 1.7434651851654053, "epoch": 5.03, "step": 235999 }, { "epoch": 5.03, "learning_rate": 5.3897755102040813e-05, "loss": 1.8752, "step": 236000 }, { "FLOPS loss": 0.006115641910582781, "L0_d": 11654.31, "MLM loss": 1.8618048429489136, "epoch": 5.04, "step": 236499 }, { "epoch": 5.04, "learning_rate": 5.379591836734694e-05, "loss": 1.8754, "step": 236500 }, { "FLOPS loss": 0.00563366012647748, "L0_d": 10211.41, "MLM loss": 1.9593673944473267, "epoch": 5.05, "step": 236999 }, { "epoch": 5.05, "learning_rate": 5.369387755102041e-05, "loss": 1.8715, "step": 237000 }, { "FLOPS loss": 0.0055515356361866, "L0_d": 9021.38, "MLM loss": 1.8693525791168213, "epoch": 5.06, "step": 237499 }, { "epoch": 5.06, "learning_rate": 5.359183673469388e-05, "loss": 1.8705, "step": 237500 }, { "FLOPS loss": 0.004694546107202768, "L0_d": 7627.42, "MLM loss": 1.9569108486175537, "epoch": 5.07, "step": 237999 }, { "epoch": 5.07, "learning_rate": 5.348979591836735e-05, "loss": 1.8742, "step": 238000 }, { "FLOPS loss": 0.0069424486719071865, "L0_d": 8976.55, "MLM loss": 1.8137693405151367, "epoch": 5.08, "step": 238499 }, { "epoch": 5.08, "learning_rate": 5.3387959183673476e-05, "loss": 1.872, "step": 238500 }, { "FLOPS loss": 0.00838744267821312, "L0_d": 8812.91, "MLM loss": 1.7488086223602295, "epoch": 5.09, "step": 238999 }, { "epoch": 5.09, "learning_rate": 5.328612244897959e-05, "loss": 1.8758, "step": 239000 }, { "FLOPS loss": 0.00693407841026783, "L0_d": 6661.42, "MLM loss": 1.987975835800171, "epoch": 5.1, "step": 239499 }, { "epoch": 5.1, "learning_rate": 5.318408163265306e-05, "loss": 1.8779, "step": 239500 }, { "FLOPS loss": 0.007485151290893555, "L0_d": 6464.69, "MLM loss": 1.8088548183441162, "epoch": 5.11, "step": 239999 }, { "epoch": 5.11, "learning_rate": 5.3082040816326526e-05, "loss": 1.875, "step": 240000 }, { "FLOPS loss": 0.008187003433704376, "L0_d": 5961.67, "MLM loss": 1.8100885152816772, "epoch": 5.12, "step": 240499 }, { "epoch": 5.12, "learning_rate": 5.2980000000000004e-05, "loss": 1.8735, "step": 240500 }, { "FLOPS loss": 0.008060183376073837, "L0_d": 5845.19, "MLM loss": 1.8168284893035889, "epoch": 5.13, "step": 240999 }, { "epoch": 5.13, "learning_rate": 5.2877959183673476e-05, "loss": 1.8771, "step": 241000 }, { "FLOPS loss": 0.010463972575962543, "L0_d": 6205.86, "MLM loss": 1.7326014041900635, "epoch": 5.14, "step": 241499 }, { "epoch": 5.14, "learning_rate": 5.277591836734694e-05, "loss": 1.8801, "step": 241500 }, { "FLOPS loss": 0.011188147589564323, "L0_d": 6291.0, "MLM loss": 1.9021883010864258, "epoch": 5.15, "step": 241999 }, { "epoch": 5.15, "learning_rate": 5.267408163265306e-05, "loss": 1.8799, "step": 242000 }, { "FLOPS loss": 0.013200161047279835, "L0_d": 7156.77, "MLM loss": 1.6133452653884888, "epoch": 5.16, "step": 242499 }, { "epoch": 5.16, "learning_rate": 5.2572040816326526e-05, "loss": 1.8787, "step": 242500 }, { "FLOPS loss": 0.013426079414784908, "L0_d": 5465.06, "MLM loss": 1.911632776260376, "epoch": 5.17, "step": 242999 }, { "epoch": 5.17, "learning_rate": 5.247000000000001e-05, "loss": 1.879, "step": 243000 }, { "FLOPS loss": 0.010374533012509346, "L0_d": 4300.98, "MLM loss": 1.865431547164917, "epoch": 5.19, "step": 243499 }, { "epoch": 5.19, "learning_rate": 5.2367959183673476e-05, "loss": 1.8795, "step": 243500 }, { "FLOPS loss": 0.014977757818996906, "L0_d": 4963.09, "MLM loss": 1.8555488586425781, "epoch": 5.2, "step": 243999 }, { "epoch": 5.2, "learning_rate": 5.226591836734695e-05, "loss": 1.8808, "step": 244000 }, { "FLOPS loss": 0.010343066416680813, "L0_d": 3994.11, "MLM loss": 1.9097082614898682, "epoch": 5.21, "step": 244499 }, { "epoch": 5.21, "learning_rate": 5.216387755102041e-05, "loss": 1.8859, "step": 244500 }, { "FLOPS loss": 0.014617117121815681, "L0_d": 4153.5, "MLM loss": 1.8086098432540894, "epoch": 5.22, "step": 244999 }, { "epoch": 5.22, "learning_rate": 5.206204081632653e-05, "loss": 1.8823, "step": 245000 }, { "FLOPS loss": 0.013393222354352474, "L0_d": 3711.11, "MLM loss": 1.910830020904541, "epoch": 5.23, "step": 245499 }, { "epoch": 5.23, "learning_rate": 5.196e-05, "loss": 1.8811, "step": 245500 }, { "FLOPS loss": 0.014843770302832127, "L0_d": 3604.33, "MLM loss": 1.838127851486206, "epoch": 5.24, "step": 245999 }, { "epoch": 5.24, "learning_rate": 5.1857959183673475e-05, "loss": 1.8852, "step": 246000 }, { "FLOPS loss": 0.019269835203886032, "L0_d": 4254.81, "MLM loss": 1.8526206016540527, "epoch": 5.25, "step": 246499 }, { "epoch": 5.25, "learning_rate": 5.175591836734695e-05, "loss": 1.8886, "step": 246500 }, { "FLOPS loss": 0.01963115483522415, "L0_d": 3850.11, "MLM loss": 2.0099689960479736, "epoch": 5.26, "step": 246999 }, { "epoch": 5.26, "learning_rate": 5.165387755102041e-05, "loss": 1.8854, "step": 247000 }, { "FLOPS loss": 0.016454359516501427, "L0_d": 3115.81, "MLM loss": 1.9626414775848389, "epoch": 5.27, "step": 247499 }, { "epoch": 5.27, "learning_rate": 5.155204081632653e-05, "loss": 1.8857, "step": 247500 }, { "FLOPS loss": 0.020083287730813026, "L0_d": 3520.73, "MLM loss": 1.9162139892578125, "epoch": 5.28, "step": 247999 }, { "epoch": 5.28, "learning_rate": 5.145e-05, "loss": 1.8854, "step": 248000 }, { "FLOPS loss": 0.0178839061409235, "L0_d": 2936.16, "MLM loss": 1.8663880825042725, "epoch": 5.29, "step": 248499 }, { "epoch": 5.29, "learning_rate": 5.134795918367347e-05, "loss": 1.8929, "step": 248500 }, { "FLOPS loss": 0.023697547614574432, "L0_d": 3550.75, "MLM loss": 1.9941072463989258, "epoch": 5.3, "step": 248999 }, { "epoch": 5.3, "learning_rate": 5.124591836734695e-05, "loss": 1.8885, "step": 249000 }, { "FLOPS loss": 0.020585505291819572, "L0_d": 2454.28, "MLM loss": 1.7720873355865479, "epoch": 5.31, "step": 249499 }, { "epoch": 5.31, "learning_rate": 5.114387755102041e-05, "loss": 1.8903, "step": 249500 }, { "FLOPS loss": 0.022391222417354584, "L0_d": 3282.28, "MLM loss": 1.873108148574829, "epoch": 5.32, "step": 249999 }, { "epoch": 5.32, "learning_rate": 5.104204081632653e-05, "loss": 1.8896, "step": 250000 }, { "FLOPS loss": 0.02332943119108677, "L0_d": 2424.98, "MLM loss": 2.0432984828948975, "epoch": 5.33, "step": 250499 }, { "epoch": 5.33, "learning_rate": 5.094e-05, "loss": 1.8887, "step": 250500 }, { "FLOPS loss": 0.02283013053238392, "L0_d": 2545.16, "MLM loss": 1.946418046951294, "epoch": 5.35, "step": 250999 }, { "epoch": 5.35, "learning_rate": 5.083795918367347e-05, "loss": 1.8915, "step": 251000 }, { "FLOPS loss": 0.027263039723038673, "L0_d": 3158.34, "MLM loss": 2.0646772384643555, "epoch": 5.36, "step": 251499 }, { "epoch": 5.36, "learning_rate": 5.0735918367346947e-05, "loss": 1.8907, "step": 251500 }, { "FLOPS loss": 0.026302088052034378, "L0_d": 3060.11, "MLM loss": 1.7769654989242554, "epoch": 5.37, "step": 251999 }, { "epoch": 5.37, "learning_rate": 5.063387755102041e-05, "loss": 1.8918, "step": 252000 }, { "FLOPS loss": 0.026689713820815086, "L0_d": 2597.42, "MLM loss": 2.0476834774017334, "epoch": 5.38, "step": 252499 }, { "epoch": 5.38, "learning_rate": 5.053183673469388e-05, "loss": 1.8961, "step": 252500 }, { "FLOPS loss": 0.029723016545176506, "L0_d": 3018.95, "MLM loss": 1.852935791015625, "epoch": 5.39, "step": 252999 }, { "epoch": 5.39, "learning_rate": 5.042979591836735e-05, "loss": 1.8928, "step": 253000 }, { "FLOPS loss": 0.025388535112142563, "L0_d": 1884.11, "MLM loss": 1.8686188459396362, "epoch": 5.4, "step": 253499 }, { "epoch": 5.4, "learning_rate": 5.032775510204082e-05, "loss": 1.8976, "step": 253500 }, { "FLOPS loss": 0.032775864005088806, "L0_d": 2505.75, "MLM loss": 1.8625152111053467, "epoch": 5.41, "step": 253999 }, { "epoch": 5.41, "learning_rate": 5.0226122448979596e-05, "loss": 1.8995, "step": 254000 }, { "FLOPS loss": 0.030364518985152245, "L0_d": 2600.05, "MLM loss": 1.926785945892334, "epoch": 5.42, "step": 254499 }, { "epoch": 5.42, "learning_rate": 5.012408163265307e-05, "loss": 1.905, "step": 254500 }, { "FLOPS loss": 0.030143748968839645, "L0_d": 2478.72, "MLM loss": 1.982461929321289, "epoch": 5.43, "step": 254999 }, { "epoch": 5.43, "learning_rate": 5.002204081632653e-05, "loss": 1.8977, "step": 255000 }, { "FLOPS loss": 0.03670482337474823, "L0_d": 2263.52, "MLM loss": 1.7775113582611084, "epoch": 5.44, "step": 255499 }, { "epoch": 5.44, "learning_rate": 4.992e-05, "loss": 1.8996, "step": 255500 }, { "FLOPS loss": 0.03422041982412338, "L0_d": 2109.53, "MLM loss": 1.787998914718628, "epoch": 5.45, "step": 255999 }, { "epoch": 5.45, "learning_rate": 4.9817959183673475e-05, "loss": 1.9009, "step": 256000 }, { "FLOPS loss": 0.03274346888065338, "L0_d": 2421.89, "MLM loss": 1.9225132465362549, "epoch": 5.46, "step": 256499 }, { "epoch": 5.46, "learning_rate": 4.971591836734694e-05, "loss": 1.9031, "step": 256500 }, { "FLOPS loss": 0.04315318539738655, "L0_d": 2542.11, "MLM loss": 1.941856026649475, "epoch": 5.47, "step": 256999 }, { "epoch": 5.47, "learning_rate": 4.961387755102041e-05, "loss": 1.9076, "step": 257000 }, { "FLOPS loss": 0.03899309039115906, "L0_d": 1758.94, "MLM loss": 1.8869366645812988, "epoch": 5.48, "step": 257499 }, { "epoch": 5.48, "learning_rate": 4.951204081632653e-05, "loss": 1.9053, "step": 257500 }, { "FLOPS loss": 0.04414968192577362, "L0_d": 2086.28, "MLM loss": 1.936894416809082, "epoch": 5.49, "step": 257999 }, { "epoch": 5.49, "learning_rate": 4.941e-05, "loss": 1.9087, "step": 258000 }, { "FLOPS loss": 0.03893755003809929, "L0_d": 2211.0, "MLM loss": 1.8997561931610107, "epoch": 5.5, "step": 258499 }, { "epoch": 5.5, "learning_rate": 4.9307959183673474e-05, "loss": 1.9008, "step": 258500 }, { "FLOPS loss": 0.039572618901729584, "L0_d": 1918.89, "MLM loss": 1.776296854019165, "epoch": 5.52, "step": 258999 }, { "epoch": 5.52, "learning_rate": 4.9205918367346946e-05, "loss": 1.9048, "step": 259000 }, { "FLOPS loss": 0.04069853574037552, "L0_d": 1673.44, "MLM loss": 1.9040732383728027, "epoch": 5.53, "step": 259499 }, { "epoch": 5.53, "learning_rate": 4.9104081632653067e-05, "loss": 1.9065, "step": 259500 }, { "FLOPS loss": 0.04161301255226135, "L0_d": 1786.67, "MLM loss": 1.7740488052368164, "epoch": 5.54, "step": 259999 }, { "epoch": 5.54, "learning_rate": 4.900204081632653e-05, "loss": 1.9154, "step": 260000 }, { "FLOPS loss": 0.03743300214409828, "L0_d": 1616.17, "MLM loss": 1.9160172939300537, "epoch": 5.55, "step": 260499 }, { "epoch": 5.55, "learning_rate": 4.89e-05, "loss": 1.9111, "step": 260500 }, { "FLOPS loss": 0.04368520900607109, "L0_d": 1466.02, "MLM loss": 2.037893772125244, "epoch": 5.56, "step": 260999 }, { "epoch": 5.56, "learning_rate": 4.879795918367347e-05, "loss": 1.9109, "step": 261000 }, { "FLOPS loss": 0.04595090448856354, "L0_d": 1555.14, "MLM loss": 1.767559289932251, "epoch": 5.57, "step": 261499 }, { "epoch": 5.57, "learning_rate": 4.8695918367346946e-05, "loss": 1.9161, "step": 261500 }, { "FLOPS loss": 0.04404296353459358, "L0_d": 1743.38, "MLM loss": 1.9173357486724854, "epoch": 5.58, "step": 261999 }, { "epoch": 5.58, "learning_rate": 4.859387755102041e-05, "loss": 1.9176, "step": 262000 }, { "FLOPS loss": 0.04245808348059654, "L0_d": 1125.33, "MLM loss": 1.6865381002426147, "epoch": 5.59, "step": 262499 }, { "epoch": 5.59, "learning_rate": 4.849183673469388e-05, "loss": 1.9178, "step": 262500 }, { "FLOPS loss": 0.04385161027312279, "L0_d": 1418.67, "MLM loss": 1.9055112600326538, "epoch": 5.6, "step": 262999 }, { "epoch": 5.6, "learning_rate": 4.8389795918367347e-05, "loss": 1.9131, "step": 263000 }, { "FLOPS loss": 0.053213585168123245, "L0_d": 2183.94, "MLM loss": 1.9083770513534546, "epoch": 5.61, "step": 263499 }, { "epoch": 5.61, "learning_rate": 4.828795918367347e-05, "loss": 1.9159, "step": 263500 }, { "FLOPS loss": 0.05162021145224571, "L0_d": 1506.91, "MLM loss": 1.8558990955352783, "epoch": 5.62, "step": 263999 }, { "epoch": 5.62, "learning_rate": 4.818591836734694e-05, "loss": 1.9171, "step": 264000 }, { "FLOPS loss": 0.05895388498902321, "L0_d": 1319.53, "MLM loss": 1.7792572975158691, "epoch": 5.63, "step": 264499 }, { "epoch": 5.63, "learning_rate": 4.808387755102041e-05, "loss": 1.9188, "step": 264500 }, { "FLOPS loss": 0.06398754566907883, "L0_d": 1367.34, "MLM loss": 1.8091468811035156, "epoch": 5.64, "step": 264999 }, { "epoch": 5.64, "learning_rate": 4.798183673469388e-05, "loss": 1.9174, "step": 265000 }, { "FLOPS loss": 0.05075083673000336, "L0_d": 1379.28, "MLM loss": 1.8456506729125977, "epoch": 5.65, "step": 265499 }, { "epoch": 5.65, "learning_rate": 4.7879795918367346e-05, "loss": 1.9232, "step": 265500 }, { "FLOPS loss": 0.0504881925880909, "L0_d": 1447.92, "MLM loss": 1.9336979389190674, "epoch": 5.66, "step": 265999 }, { "epoch": 5.66, "learning_rate": 4.7777959183673474e-05, "loss": 1.9207, "step": 266000 }, { "FLOPS loss": 0.06466767936944962, "L0_d": 1269.03, "MLM loss": 1.8459978103637695, "epoch": 5.68, "step": 266499 }, { "epoch": 5.68, "learning_rate": 4.767591836734694e-05, "loss": 1.9252, "step": 266500 }, { "FLOPS loss": 0.055939216166734695, "L0_d": 1076.92, "MLM loss": 1.9006898403167725, "epoch": 5.69, "step": 266999 }, { "epoch": 5.69, "learning_rate": 4.757387755102041e-05, "loss": 1.9256, "step": 267000 }, { "FLOPS loss": 0.06076573580503464, "L0_d": 1341.17, "MLM loss": 1.926182508468628, "epoch": 5.7, "step": 267499 }, { "epoch": 5.7, "learning_rate": 4.747183673469388e-05, "loss": 1.9289, "step": 267500 }, { "FLOPS loss": 0.054936155676841736, "L0_d": 1288.95, "MLM loss": 1.973268985748291, "epoch": 5.71, "step": 267999 }, { "epoch": 5.71, "learning_rate": 4.7369795918367346e-05, "loss": 1.9284, "step": 268000 }, { "FLOPS loss": 0.06445150822401047, "L0_d": 1231.92, "MLM loss": 2.0671331882476807, "epoch": 5.72, "step": 268499 }, { "epoch": 5.72, "learning_rate": 4.7267959183673474e-05, "loss": 1.9293, "step": 268500 }, { "FLOPS loss": 0.06608186662197113, "L0_d": 1103.44, "MLM loss": 1.9241859912872314, "epoch": 5.73, "step": 268999 }, { "epoch": 5.73, "learning_rate": 4.716591836734694e-05, "loss": 1.9299, "step": 269000 }, { "FLOPS loss": 0.05536879971623421, "L0_d": 945.16, "MLM loss": 1.7656147480010986, "epoch": 5.74, "step": 269499 }, { "epoch": 5.74, "learning_rate": 4.706387755102041e-05, "loss": 1.9338, "step": 269500 }, { "FLOPS loss": 0.06108957156538963, "L0_d": 1119.91, "MLM loss": 1.729440689086914, "epoch": 5.75, "step": 269999 }, { "epoch": 5.75, "learning_rate": 4.696183673469388e-05, "loss": 1.9344, "step": 270000 }, { "FLOPS loss": 0.05857192724943161, "L0_d": 900.25, "MLM loss": 2.0199506282806396, "epoch": 5.76, "step": 270499 }, { "epoch": 5.76, "learning_rate": 4.686e-05, "loss": 1.9318, "step": 270500 }, { "FLOPS loss": 0.06669124215841293, "L0_d": 1039.64, "MLM loss": 1.8732706308364868, "epoch": 5.77, "step": 270999 }, { "epoch": 5.77, "learning_rate": 4.6757959183673473e-05, "loss": 1.936, "step": 271000 }, { "FLOPS loss": 0.06447934359312057, "L0_d": 1104.81, "MLM loss": 1.7664421796798706, "epoch": 5.78, "step": 271499 }, { "epoch": 5.78, "learning_rate": 4.665591836734694e-05, "loss": 1.9361, "step": 271500 }, { "FLOPS loss": 0.07491941750049591, "L0_d": 1102.53, "MLM loss": 1.9895565509796143, "epoch": 5.79, "step": 271999 }, { "epoch": 5.79, "learning_rate": 4.655387755102041e-05, "loss": 1.9386, "step": 272000 }, { "FLOPS loss": 0.06251773238182068, "L0_d": 894.05, "MLM loss": 1.930220365524292, "epoch": 5.8, "step": 272499 }, { "epoch": 5.8, "learning_rate": 4.645183673469388e-05, "loss": 1.9419, "step": 272500 }, { "FLOPS loss": 0.06176398694515228, "L0_d": 843.98, "MLM loss": 1.6715091466903687, "epoch": 5.81, "step": 272999 }, { "epoch": 5.81, "learning_rate": 4.634979591836735e-05, "loss": 1.9398, "step": 273000 }, { "FLOPS loss": 0.07493959367275238, "L0_d": 864.36, "MLM loss": 2.028404474258423, "epoch": 5.82, "step": 273499 }, { "epoch": 5.82, "learning_rate": 4.624795918367347e-05, "loss": 1.9426, "step": 273500 }, { "FLOPS loss": 0.05773789435625076, "L0_d": 694.45, "MLM loss": 1.8639841079711914, "epoch": 5.83, "step": 273999 }, { "epoch": 5.84, "learning_rate": 4.614591836734694e-05, "loss": 1.944, "step": 274000 }, { "FLOPS loss": 0.0750850960612297, "L0_d": 890.12, "MLM loss": 1.9028539657592773, "epoch": 5.85, "step": 274499 }, { "epoch": 5.85, "learning_rate": 4.604387755102041e-05, "loss": 1.9442, "step": 274500 }, { "FLOPS loss": 0.07717510312795639, "L0_d": 1054.8, "MLM loss": 1.8612613677978516, "epoch": 5.86, "step": 274999 }, { "epoch": 5.86, "learning_rate": 4.5941836734693874e-05, "loss": 1.9462, "step": 275000 }, { "FLOPS loss": 0.07143872231245041, "L0_d": 700.31, "MLM loss": 1.8450987339019775, "epoch": 5.87, "step": 275499 }, { "epoch": 5.87, "learning_rate": 4.583979591836735e-05, "loss": 1.9472, "step": 275500 }, { "FLOPS loss": 0.08762510865926743, "L0_d": 987.81, "MLM loss": 1.8697837591171265, "epoch": 5.88, "step": 275999 }, { "epoch": 5.88, "learning_rate": 4.573795918367347e-05, "loss": 1.9489, "step": 276000 }, { "FLOPS loss": 0.06401695311069489, "L0_d": 813.38, "MLM loss": 1.9566137790679932, "epoch": 5.89, "step": 276499 }, { "epoch": 5.89, "learning_rate": 4.5635918367346945e-05, "loss": 1.9506, "step": 276500 }, { "FLOPS loss": 0.06458479911088943, "L0_d": 561.38, "MLM loss": 1.8860130310058594, "epoch": 5.9, "step": 276999 }, { "epoch": 5.9, "learning_rate": 4.553387755102041e-05, "loss": 1.9504, "step": 277000 }, { "FLOPS loss": 0.06730065494775772, "L0_d": 896.34, "MLM loss": 1.893805980682373, "epoch": 5.91, "step": 277499 }, { "epoch": 5.91, "learning_rate": 4.543183673469388e-05, "loss": 1.954, "step": 277500 }, { "FLOPS loss": 0.08798781037330627, "L0_d": 1083.72, "MLM loss": 1.9584965705871582, "epoch": 5.92, "step": 277999 }, { "epoch": 5.92, "learning_rate": 4.533e-05, "loss": 1.9502, "step": 278000 }, { "FLOPS loss": 0.07679998129606247, "L0_d": 1217.53, "MLM loss": 1.9031168222427368, "epoch": 5.93, "step": 278499 }, { "epoch": 5.93, "learning_rate": 4.522795918367347e-05, "loss": 1.9538, "step": 278500 }, { "FLOPS loss": 0.06088661774992943, "L0_d": 524.78, "MLM loss": 1.8549163341522217, "epoch": 5.94, "step": 278999 }, { "epoch": 5.94, "learning_rate": 4.5125918367346944e-05, "loss": 1.9559, "step": 279000 }, { "FLOPS loss": 0.0638987198472023, "L0_d": 638.92, "MLM loss": 2.013615131378174, "epoch": 5.95, "step": 279499 }, { "epoch": 5.95, "learning_rate": 4.502387755102041e-05, "loss": 1.9549, "step": 279500 }, { "FLOPS loss": 0.10002320259809494, "L0_d": 722.52, "MLM loss": 1.861433506011963, "epoch": 5.96, "step": 279999 }, { "epoch": 5.96, "learning_rate": 4.492183673469388e-05, "loss": 1.9545, "step": 280000 }, { "FLOPS loss": 0.06846465915441513, "L0_d": 619.73, "MLM loss": 2.04079270362854, "epoch": 5.97, "step": 280499 }, { "epoch": 5.97, "learning_rate": 4.482e-05, "loss": 1.9575, "step": 280500 }, { "FLOPS loss": 0.08104480803012848, "L0_d": 706.91, "MLM loss": 1.932988166809082, "epoch": 5.98, "step": 280999 }, { "epoch": 5.98, "learning_rate": 4.4717959183673466e-05, "loss": 1.956, "step": 281000 }, { "FLOPS loss": 0.06805707514286041, "L0_d": 595.69, "MLM loss": 1.7187745571136475, "epoch": 5.99, "step": 281499 }, { "epoch": 5.99, "learning_rate": 4.4615918367346944e-05, "loss": 1.9531, "step": 281500 }, { "FLOPS loss": 0.07271081209182739, "L0_d": 627.72, "MLM loss": 1.9254471063613892, "epoch": 6.01, "step": 281999 }, { "epoch": 6.01, "learning_rate": 4.451387755102041e-05, "loss": 1.9552, "step": 282000 }, { "FLOPS loss": 0.08167015016078949, "L0_d": 696.19, "MLM loss": 2.0464000701904297, "epoch": 6.02, "step": 282499 }, { "epoch": 6.02, "learning_rate": 4.441183673469388e-05, "loss": 1.9541, "step": 282500 }, { "FLOPS loss": 0.08481060713529587, "L0_d": 886.92, "MLM loss": 1.9736199378967285, "epoch": 6.03, "step": 282999 }, { "epoch": 6.03, "learning_rate": 4.431e-05, "loss": 1.9535, "step": 283000 }, { "FLOPS loss": 0.06780994683504105, "L0_d": 677.19, "MLM loss": 1.9477245807647705, "epoch": 6.04, "step": 283499 }, { "epoch": 6.04, "learning_rate": 4.420795918367347e-05, "loss": 1.9543, "step": 283500 }, { "FLOPS loss": 0.07411380112171173, "L0_d": 743.67, "MLM loss": 2.0674924850463867, "epoch": 6.05, "step": 283999 }, { "epoch": 6.05, "learning_rate": 4.4105918367346944e-05, "loss": 1.9532, "step": 284000 }, { "FLOPS loss": 0.07907848060131073, "L0_d": 974.5, "MLM loss": 1.780951738357544, "epoch": 6.06, "step": 284499 }, { "epoch": 6.06, "learning_rate": 4.400387755102041e-05, "loss": 1.9521, "step": 284500 }, { "FLOPS loss": 0.07107169926166534, "L0_d": 681.83, "MLM loss": 1.8572118282318115, "epoch": 6.07, "step": 284999 }, { "epoch": 6.07, "learning_rate": 4.390183673469388e-05, "loss": 1.9522, "step": 285000 }, { "FLOPS loss": 0.06437213718891144, "L0_d": 544.11, "MLM loss": 1.940516471862793, "epoch": 6.08, "step": 285499 }, { "epoch": 6.08, "learning_rate": 4.38e-05, "loss": 1.9512, "step": 285500 }, { "FLOPS loss": 0.08128892630338669, "L0_d": 735.03, "MLM loss": 1.9186279773712158, "epoch": 6.09, "step": 285999 }, { "epoch": 6.09, "learning_rate": 4.369795918367347e-05, "loss": 1.9538, "step": 286000 }, { "FLOPS loss": 0.08808960020542145, "L0_d": 1024.83, "MLM loss": 1.988155722618103, "epoch": 6.1, "step": 286499 }, { "epoch": 6.1, "learning_rate": 4.359591836734694e-05, "loss": 1.9537, "step": 286500 }, { "FLOPS loss": 0.07475665211677551, "L0_d": 571.81, "MLM loss": 1.839475393295288, "epoch": 6.11, "step": 286999 }, { "epoch": 6.11, "learning_rate": 4.3493877551020415e-05, "loss": 1.9508, "step": 287000 }, { "FLOPS loss": 0.06577183306217194, "L0_d": 429.23, "MLM loss": 1.8133153915405273, "epoch": 6.12, "step": 287499 }, { "epoch": 6.12, "learning_rate": 4.3392040816326536e-05, "loss": 1.9493, "step": 287500 }, { "FLOPS loss": 0.07787065207958221, "L0_d": 620.91, "MLM loss": 1.8850531578063965, "epoch": 6.13, "step": 287999 }, { "epoch": 6.13, "learning_rate": 4.329e-05, "loss": 1.9523, "step": 288000 }, { "FLOPS loss": 0.07825678586959839, "L0_d": 962.11, "MLM loss": 1.900402545928955, "epoch": 6.14, "step": 288499 }, { "epoch": 6.14, "learning_rate": 4.318795918367347e-05, "loss": 1.9498, "step": 288500 }, { "FLOPS loss": 0.06425338983535767, "L0_d": 524.34, "MLM loss": 1.85397469997406, "epoch": 6.15, "step": 288999 }, { "epoch": 6.15, "learning_rate": 4.308591836734694e-05, "loss": 1.9492, "step": 289000 }, { "FLOPS loss": 0.08077821135520935, "L0_d": 669.0, "MLM loss": 2.0354411602020264, "epoch": 6.17, "step": 289499 }, { "epoch": 6.17, "learning_rate": 4.2983877551020415e-05, "loss": 1.9522, "step": 289500 }, { "FLOPS loss": 0.07401479035615921, "L0_d": 786.83, "MLM loss": 1.9391406774520874, "epoch": 6.18, "step": 289999 }, { "epoch": 6.18, "learning_rate": 4.2882040816326536e-05, "loss": 1.9523, "step": 290000 }, { "FLOPS loss": 0.06442100554704666, "L0_d": 726.97, "MLM loss": 1.934419870376587, "epoch": 6.19, "step": 290499 }, { "epoch": 6.19, "learning_rate": 4.278e-05, "loss": 1.947, "step": 290500 }, { "FLOPS loss": 0.07297225296497345, "L0_d": 774.31, "MLM loss": 1.890745997428894, "epoch": 6.2, "step": 290999 }, { "epoch": 6.2, "learning_rate": 4.267795918367347e-05, "loss": 1.9549, "step": 291000 }, { "FLOPS loss": 0.07912027090787888, "L0_d": 898.61, "MLM loss": 2.038191318511963, "epoch": 6.21, "step": 291499 }, { "epoch": 6.21, "learning_rate": 4.2575918367346937e-05, "loss": 1.9493, "step": 291500 }, { "FLOPS loss": 0.0650862529873848, "L0_d": 768.06, "MLM loss": 1.9675188064575195, "epoch": 6.22, "step": 291999 }, { "epoch": 6.22, "learning_rate": 4.2474081632653064e-05, "loss": 1.9483, "step": 292000 }, { "FLOPS loss": 0.07283622026443481, "L0_d": 700.41, "MLM loss": 1.8989489078521729, "epoch": 6.23, "step": 292499 }, { "epoch": 6.23, "learning_rate": 4.237204081632653e-05, "loss": 1.9513, "step": 292500 }, { "FLOPS loss": 0.06748488545417786, "L0_d": 741.77, "MLM loss": 1.887213945388794, "epoch": 6.24, "step": 292999 }, { "epoch": 6.24, "learning_rate": 4.227000000000001e-05, "loss": 1.9479, "step": 293000 }, { "FLOPS loss": 0.07973089814186096, "L0_d": 664.5, "MLM loss": 2.0132598876953125, "epoch": 6.25, "step": 293499 }, { "epoch": 6.25, "learning_rate": 4.216795918367347e-05, "loss": 1.9496, "step": 293500 }, { "FLOPS loss": 0.07241816818714142, "L0_d": 707.08, "MLM loss": 1.7604951858520508, "epoch": 6.26, "step": 293999 }, { "epoch": 6.26, "learning_rate": 4.206612244897959e-05, "loss": 1.9465, "step": 294000 }, { "FLOPS loss": 0.07850213348865509, "L0_d": 698.77, "MLM loss": 1.9729360342025757, "epoch": 6.27, "step": 294499 }, { "epoch": 6.27, "learning_rate": 4.1964081632653064e-05, "loss": 1.9481, "step": 294500 }, { "FLOPS loss": 0.08519662916660309, "L0_d": 785.38, "MLM loss": 1.8330121040344238, "epoch": 6.28, "step": 294999 }, { "epoch": 6.28, "learning_rate": 4.186204081632653e-05, "loss": 1.9431, "step": 295000 }, { "FLOPS loss": 0.05845937877893448, "L0_d": 621.36, "MLM loss": 1.9399977922439575, "epoch": 6.29, "step": 295499 }, { "epoch": 6.29, "learning_rate": 4.176000000000001e-05, "loss": 1.947, "step": 295500 }, { "FLOPS loss": 0.06915000826120377, "L0_d": 650.09, "MLM loss": 1.8091906309127808, "epoch": 6.3, "step": 295999 }, { "epoch": 6.3, "learning_rate": 4.165816326530613e-05, "loss": 1.9482, "step": 296000 }, { "FLOPS loss": 0.07740022242069244, "L0_d": 687.33, "MLM loss": 1.9560225009918213, "epoch": 6.31, "step": 296499 }, { "epoch": 6.31, "learning_rate": 4.155612244897959e-05, "loss": 1.9488, "step": 296500 }, { "FLOPS loss": 0.08005591481924057, "L0_d": 899.48, "MLM loss": 1.7681009769439697, "epoch": 6.32, "step": 296999 }, { "epoch": 6.32, "learning_rate": 4.1454081632653064e-05, "loss": 1.9483, "step": 297000 }, { "FLOPS loss": 0.06122187525033951, "L0_d": 580.2, "MLM loss": 1.9148626327514648, "epoch": 6.34, "step": 297499 }, { "epoch": 6.34, "learning_rate": 4.135204081632653e-05, "loss": 1.9475, "step": 297500 }, { "FLOPS loss": 0.07536512613296509, "L0_d": 643.31, "MLM loss": 1.77424156665802, "epoch": 6.35, "step": 297999 }, { "epoch": 6.35, "learning_rate": 4.125e-05, "loss": 1.9466, "step": 298000 }, { "FLOPS loss": 0.0731494352221489, "L0_d": 579.67, "MLM loss": 1.8063685894012451, "epoch": 6.36, "step": 298499 }, { "epoch": 6.36, "learning_rate": 4.114816326530612e-05, "loss": 1.9478, "step": 298500 }, { "FLOPS loss": 0.09138891100883484, "L0_d": 789.53, "MLM loss": 1.8201489448547363, "epoch": 6.37, "step": 298999 }, { "epoch": 6.37, "learning_rate": 4.10461224489796e-05, "loss": 1.9477, "step": 299000 }, { "FLOPS loss": 0.07261373102664948, "L0_d": 794.72, "MLM loss": 1.878649353981018, "epoch": 6.38, "step": 299499 }, { "epoch": 6.38, "learning_rate": 4.0944081632653063e-05, "loss": 1.9458, "step": 299500 }, { "FLOPS loss": 0.08437049388885498, "L0_d": 1008.7, "MLM loss": 1.8229589462280273, "epoch": 6.39, "step": 299999 }, { "epoch": 6.39, "learning_rate": 4.0842040816326535e-05, "loss": 1.9434, "step": 300000 }, { "FLOPS loss": 0.08365792036056519, "L0_d": 657.16, "MLM loss": 1.788762092590332, "epoch": 6.4, "step": 300499 }, { "epoch": 6.4, "learning_rate": 4.0740204081632656e-05, "loss": 1.9439, "step": 300500 }, { "FLOPS loss": 0.07193128764629364, "L0_d": 711.5, "MLM loss": 1.8217332363128662, "epoch": 6.41, "step": 300999 }, { "epoch": 6.41, "learning_rate": 4.063816326530612e-05, "loss": 1.946, "step": 301000 }, { "FLOPS loss": 0.09581154584884644, "L0_d": 1142.42, "MLM loss": 1.8188657760620117, "epoch": 6.42, "step": 301499 }, { "epoch": 6.42, "learning_rate": 4.05361224489796e-05, "loss": 1.945, "step": 301500 }, { "FLOPS loss": 0.08122092485427856, "L0_d": 975.69, "MLM loss": 1.894965648651123, "epoch": 6.43, "step": 301999 }, { "epoch": 6.43, "learning_rate": 4.043408163265306e-05, "loss": 1.9434, "step": 302000 }, { "FLOPS loss": 0.061854831874370575, "L0_d": 463.28, "MLM loss": 1.888474702835083, "epoch": 6.44, "step": 302499 }, { "epoch": 6.44, "learning_rate": 4.0332040816326535e-05, "loss": 1.9453, "step": 302500 }, { "FLOPS loss": 0.08056728541851044, "L0_d": 863.88, "MLM loss": 1.8775725364685059, "epoch": 6.45, "step": 302999 }, { "epoch": 6.45, "learning_rate": 4.0230204081632655e-05, "loss": 1.9457, "step": 303000 }, { "FLOPS loss": 0.07168541103601456, "L0_d": 606.09, "MLM loss": 1.7786054611206055, "epoch": 6.46, "step": 303499 }, { "epoch": 6.46, "learning_rate": 4.012816326530613e-05, "loss": 1.9436, "step": 303500 }, { "FLOPS loss": 0.08951392024755478, "L0_d": 1066.7, "MLM loss": 1.9425861835479736, "epoch": 6.47, "step": 303999 }, { "epoch": 6.47, "learning_rate": 4.002612244897959e-05, "loss": 1.9439, "step": 304000 }, { "FLOPS loss": 0.09145156294107437, "L0_d": 692.55, "MLM loss": 2.026456832885742, "epoch": 6.48, "step": 304499 }, { "epoch": 6.48, "learning_rate": 3.992408163265306e-05, "loss": 1.9466, "step": 304500 }, { "FLOPS loss": 0.06714989244937897, "L0_d": 774.73, "MLM loss": 1.8669438362121582, "epoch": 6.5, "step": 304999 }, { "epoch": 6.5, "learning_rate": 3.9822040816326534e-05, "loss": 1.9479, "step": 305000 }, { "FLOPS loss": 0.06919477134943008, "L0_d": 791.53, "MLM loss": 1.8139466047286987, "epoch": 6.51, "step": 305499 }, { "epoch": 6.51, "learning_rate": 3.9720204081632655e-05, "loss": 1.9475, "step": 305500 }, { "FLOPS loss": 0.08101382851600647, "L0_d": 806.67, "MLM loss": 1.8381314277648926, "epoch": 6.52, "step": 305999 }, { "epoch": 6.52, "learning_rate": 3.961816326530613e-05, "loss": 1.9405, "step": 306000 }, { "FLOPS loss": 0.07709395885467529, "L0_d": 754.17, "MLM loss": 1.8039436340332031, "epoch": 6.53, "step": 306499 }, { "epoch": 6.53, "learning_rate": 3.951612244897959e-05, "loss": 1.941, "step": 306500 }, { "FLOPS loss": 0.07149617373943329, "L0_d": 715.22, "MLM loss": 1.7575922012329102, "epoch": 6.54, "step": 306999 }, { "epoch": 6.54, "learning_rate": 3.941408163265306e-05, "loss": 1.9399, "step": 307000 }, { "FLOPS loss": 0.09008293598890305, "L0_d": 860.47, "MLM loss": 1.9457957744598389, "epoch": 6.55, "step": 307499 }, { "epoch": 6.55, "learning_rate": 3.9312244897959184e-05, "loss": 1.9408, "step": 307500 }, { "FLOPS loss": 0.06867194920778275, "L0_d": 567.94, "MLM loss": 1.8648253679275513, "epoch": 6.56, "step": 307999 }, { "epoch": 6.56, "learning_rate": 3.9210204081632655e-05, "loss": 1.9429, "step": 308000 }, { "FLOPS loss": 0.07760652899742126, "L0_d": 842.48, "MLM loss": 1.827282428741455, "epoch": 6.57, "step": 308499 }, { "epoch": 6.57, "learning_rate": 3.9108163265306126e-05, "loss": 1.9409, "step": 308500 }, { "FLOPS loss": 0.06471911817789078, "L0_d": 742.31, "MLM loss": 2.1198086738586426, "epoch": 6.58, "step": 308999 }, { "epoch": 6.58, "learning_rate": 3.900612244897959e-05, "loss": 1.941, "step": 309000 }, { "FLOPS loss": 0.07304774969816208, "L0_d": 655.27, "MLM loss": 1.845359444618225, "epoch": 6.59, "step": 309499 }, { "epoch": 6.59, "learning_rate": 3.890428571428572e-05, "loss": 1.9416, "step": 309500 }, { "FLOPS loss": 0.08457273244857788, "L0_d": 622.2, "MLM loss": 2.051377534866333, "epoch": 6.6, "step": 309999 }, { "epoch": 6.6, "learning_rate": 3.880224489795918e-05, "loss": 1.9402, "step": 310000 }, { "FLOPS loss": 0.06825979799032211, "L0_d": 514.53, "MLM loss": 1.8188495635986328, "epoch": 6.61, "step": 310499 }, { "epoch": 6.61, "learning_rate": 3.8700204081632655e-05, "loss": 1.9433, "step": 310500 }, { "FLOPS loss": 0.08188638836145401, "L0_d": 932.84, "MLM loss": 1.9714046716690063, "epoch": 6.62, "step": 310999 }, { "epoch": 6.62, "learning_rate": 3.8598163265306126e-05, "loss": 1.9389, "step": 311000 }, { "FLOPS loss": 0.07594864070415497, "L0_d": 988.8, "MLM loss": 1.8642597198486328, "epoch": 6.63, "step": 311499 }, { "epoch": 6.63, "learning_rate": 3.849612244897959e-05, "loss": 1.9414, "step": 311500 }, { "FLOPS loss": 0.07680333405733109, "L0_d": 648.48, "MLM loss": 1.8027818202972412, "epoch": 6.64, "step": 311999 }, { "epoch": 6.64, "learning_rate": 3.839408163265306e-05, "loss": 1.94, "step": 312000 }, { "FLOPS loss": 0.0731196478009224, "L0_d": 701.98, "MLM loss": 1.8166855573654175, "epoch": 6.65, "step": 312499 }, { "epoch": 6.65, "learning_rate": 3.829224489795918e-05, "loss": 1.9385, "step": 312500 }, { "FLOPS loss": 0.08190134912729263, "L0_d": 796.22, "MLM loss": 2.0480880737304688, "epoch": 6.67, "step": 312999 }, { "epoch": 6.67, "learning_rate": 3.8190204081632655e-05, "loss": 1.9408, "step": 313000 }, { "FLOPS loss": 0.07585899531841278, "L0_d": 712.28, "MLM loss": 1.9030694961547852, "epoch": 6.68, "step": 313499 }, { "epoch": 6.68, "learning_rate": 3.8088163265306126e-05, "loss": 1.9398, "step": 313500 }, { "FLOPS loss": 0.0705098956823349, "L0_d": 658.12, "MLM loss": 1.8781006336212158, "epoch": 6.69, "step": 313999 }, { "epoch": 6.69, "learning_rate": 3.798632653061225e-05, "loss": 1.9411, "step": 314000 }, { "FLOPS loss": 0.0811757817864418, "L0_d": 609.66, "MLM loss": 1.8772015571594238, "epoch": 6.7, "step": 314499 }, { "epoch": 6.7, "learning_rate": 3.788428571428572e-05, "loss": 1.9359, "step": 314500 }, { "FLOPS loss": 0.06603971868753433, "L0_d": 583.78, "MLM loss": 1.8940114974975586, "epoch": 6.71, "step": 314999 }, { "epoch": 6.71, "learning_rate": 3.778224489795918e-05, "loss": 1.9385, "step": 315000 }, { "FLOPS loss": 0.07398807257413864, "L0_d": 744.42, "MLM loss": 1.8569490909576416, "epoch": 6.72, "step": 315499 }, { "epoch": 6.72, "learning_rate": 3.7680204081632654e-05, "loss": 1.9415, "step": 315500 }, { "FLOPS loss": 0.06338933110237122, "L0_d": 631.48, "MLM loss": 1.8887548446655273, "epoch": 6.73, "step": 315999 }, { "epoch": 6.73, "learning_rate": 3.7578163265306126e-05, "loss": 1.9367, "step": 316000 }, { "FLOPS loss": 0.06369595974683762, "L0_d": 808.41, "MLM loss": 1.889241337776184, "epoch": 6.74, "step": 316499 }, { "epoch": 6.74, "learning_rate": 3.74761224489796e-05, "loss": 1.9429, "step": 316500 }, { "FLOPS loss": 0.06813719868659973, "L0_d": 613.62, "MLM loss": 1.9069278240203857, "epoch": 6.75, "step": 316999 }, { "epoch": 6.75, "learning_rate": 3.737408163265306e-05, "loss": 1.9385, "step": 317000 }, { "FLOPS loss": 0.07158663868904114, "L0_d": 745.84, "MLM loss": 1.7616331577301025, "epoch": 6.76, "step": 317499 }, { "epoch": 6.76, "learning_rate": 3.7272040816326533e-05, "loss": 1.9356, "step": 317500 }, { "FLOPS loss": 0.09117750078439713, "L0_d": 624.55, "MLM loss": 1.9352805614471436, "epoch": 6.77, "step": 317999 }, { "epoch": 6.77, "learning_rate": 3.7170204081632654e-05, "loss": 1.9347, "step": 318000 }, { "FLOPS loss": 0.06324110925197601, "L0_d": 602.06, "MLM loss": 1.833200454711914, "epoch": 6.78, "step": 318499 }, { "epoch": 6.78, "learning_rate": 3.706816326530612e-05, "loss": 1.9401, "step": 318500 }, { "FLOPS loss": 0.08155985921621323, "L0_d": 966.45, "MLM loss": 1.7821117639541626, "epoch": 6.79, "step": 318999 }, { "epoch": 6.79, "learning_rate": 3.69661224489796e-05, "loss": 1.9365, "step": 319000 }, { "FLOPS loss": 0.06751174479722977, "L0_d": 627.66, "MLM loss": 1.6768591403961182, "epoch": 6.8, "step": 319499 }, { "epoch": 6.8, "learning_rate": 3.686408163265306e-05, "loss": 1.932, "step": 319500 }, { "FLOPS loss": 0.08122416585683823, "L0_d": 705.98, "MLM loss": 1.8377563953399658, "epoch": 6.81, "step": 319999 }, { "epoch": 6.81, "learning_rate": 3.676204081632653e-05, "loss": 1.9361, "step": 320000 }, { "FLOPS loss": 0.06721948832273483, "L0_d": 708.16, "MLM loss": 1.8212740421295166, "epoch": 6.83, "step": 320499 }, { "epoch": 6.83, "learning_rate": 3.6660204081632654e-05, "loss": 1.9352, "step": 320500 }, { "FLOPS loss": 0.09321010857820511, "L0_d": 1287.06, "MLM loss": 1.8241631984710693, "epoch": 6.84, "step": 320999 }, { "epoch": 6.84, "learning_rate": 3.6558163265306125e-05, "loss": 1.9323, "step": 321000 }, { "FLOPS loss": 0.06190552935004234, "L0_d": 528.55, "MLM loss": 1.9443321228027344, "epoch": 6.85, "step": 321499 }, { "epoch": 6.85, "learning_rate": 3.64561224489796e-05, "loss": 1.936, "step": 321500 }, { "FLOPS loss": 0.061153117567300797, "L0_d": 520.72, "MLM loss": 1.832186222076416, "epoch": 6.86, "step": 321999 }, { "epoch": 6.86, "learning_rate": 3.635408163265306e-05, "loss": 1.9342, "step": 322000 }, { "FLOPS loss": 0.09563008695840836, "L0_d": 753.09, "MLM loss": 1.8728309869766235, "epoch": 6.87, "step": 322499 }, { "epoch": 6.87, "learning_rate": 3.625224489795919e-05, "loss": 1.9354, "step": 322500 }, { "FLOPS loss": 0.07957378774881363, "L0_d": 795.86, "MLM loss": 1.8879680633544922, "epoch": 6.88, "step": 322999 }, { "epoch": 6.88, "learning_rate": 3.6150204081632654e-05, "loss": 1.9349, "step": 323000 }, { "FLOPS loss": 0.06421475112438202, "L0_d": 706.42, "MLM loss": 1.9722673892974854, "epoch": 6.89, "step": 323499 }, { "epoch": 6.89, "learning_rate": 3.6048163265306125e-05, "loss": 1.936, "step": 323500 }, { "FLOPS loss": 0.0862734466791153, "L0_d": 833.16, "MLM loss": 1.8255963325500488, "epoch": 6.9, "step": 323999 }, { "epoch": 6.9, "learning_rate": 3.594612244897959e-05, "loss": 1.934, "step": 324000 }, { "FLOPS loss": 0.06920310854911804, "L0_d": 725.91, "MLM loss": 1.8486316204071045, "epoch": 6.91, "step": 324499 }, { "epoch": 6.91, "learning_rate": 3.584408163265307e-05, "loss": 1.9328, "step": 324500 }, { "FLOPS loss": 0.07331109046936035, "L0_d": 611.11, "MLM loss": 1.7338364124298096, "epoch": 6.92, "step": 324999 }, { "epoch": 6.92, "learning_rate": 3.574224489795919e-05, "loss": 1.9357, "step": 325000 }, { "FLOPS loss": 0.06642386317253113, "L0_d": 547.34, "MLM loss": 1.8029348850250244, "epoch": 6.93, "step": 325499 }, { "epoch": 6.93, "learning_rate": 3.5640204081632654e-05, "loss": 1.9292, "step": 325500 }, { "FLOPS loss": 0.06935901939868927, "L0_d": 794.67, "MLM loss": 2.0707836151123047, "epoch": 6.94, "step": 325999 }, { "epoch": 6.94, "learning_rate": 3.5538163265306125e-05, "loss": 1.9316, "step": 326000 }, { "FLOPS loss": 0.0792873278260231, "L0_d": 834.64, "MLM loss": 1.8418869972229004, "epoch": 6.95, "step": 326499 }, { "epoch": 6.95, "learning_rate": 3.543612244897959e-05, "loss": 1.9348, "step": 326500 }, { "FLOPS loss": 0.07487780600786209, "L0_d": 580.11, "MLM loss": 1.8635060787200928, "epoch": 6.96, "step": 326999 }, { "epoch": 6.96, "learning_rate": 3.533428571428572e-05, "loss": 1.9305, "step": 327000 }, { "FLOPS loss": 0.08103643357753754, "L0_d": 751.62, "MLM loss": 1.7533881664276123, "epoch": 6.97, "step": 327499 }, { "epoch": 6.97, "learning_rate": 3.523224489795919e-05, "loss": 1.9311, "step": 327500 }, { "FLOPS loss": 0.07984720915555954, "L0_d": 857.66, "MLM loss": 1.7085926532745361, "epoch": 6.98, "step": 327999 }, { "epoch": 6.98, "learning_rate": 3.513020408163265e-05, "loss": 1.9311, "step": 328000 }, { "FLOPS loss": 0.09680966287851334, "L0_d": 1293.66, "MLM loss": 1.8928158283233643, "epoch": 7.0, "step": 328499 }, { "epoch": 7.0, "learning_rate": 3.5028163265306125e-05, "loss": 1.9302, "step": 328500 }, { "FLOPS loss": 0.05976860970258713, "L0_d": 521.03, "MLM loss": 1.7658557891845703, "epoch": 7.01, "step": 328999 }, { "epoch": 7.01, "learning_rate": 3.4926326530612246e-05, "loss": 1.9286, "step": 329000 }, { "FLOPS loss": 0.05823574215173721, "L0_d": 734.66, "MLM loss": 1.8498952388763428, "epoch": 7.02, "step": 329499 }, { "epoch": 7.02, "learning_rate": 3.482428571428572e-05, "loss": 1.9281, "step": 329500 }, { "FLOPS loss": 0.09669086337089539, "L0_d": 855.12, "MLM loss": 1.881850242614746, "epoch": 7.03, "step": 329999 }, { "epoch": 7.03, "learning_rate": 3.472224489795918e-05, "loss": 1.9277, "step": 330000 }, { "FLOPS loss": 0.08522310107946396, "L0_d": 818.73, "MLM loss": 1.786057710647583, "epoch": 7.04, "step": 330499 }, { "epoch": 7.04, "learning_rate": 3.462020408163266e-05, "loss": 1.9314, "step": 330500 }, { "FLOPS loss": 0.06717081367969513, "L0_d": 763.72, "MLM loss": 1.8741955757141113, "epoch": 7.05, "step": 330999 }, { "epoch": 7.05, "learning_rate": 3.4518163265306125e-05, "loss": 1.924, "step": 331000 }, { "FLOPS loss": 0.07524064928293228, "L0_d": 709.78, "MLM loss": 1.97847318649292, "epoch": 7.06, "step": 331499 }, { "epoch": 7.06, "learning_rate": 3.4416326530612245e-05, "loss": 1.9289, "step": 331500 }, { "FLOPS loss": 0.06118454411625862, "L0_d": 820.77, "MLM loss": 1.9433472156524658, "epoch": 7.07, "step": 331999 }, { "epoch": 7.07, "learning_rate": 3.431428571428572e-05, "loss": 1.9319, "step": 332000 }, { "FLOPS loss": 0.07941633462905884, "L0_d": 688.64, "MLM loss": 1.976243495941162, "epoch": 7.08, "step": 332499 }, { "epoch": 7.08, "learning_rate": 3.421224489795918e-05, "loss": 1.9272, "step": 332500 }, { "FLOPS loss": 0.07291039079427719, "L0_d": 806.14, "MLM loss": 1.8062152862548828, "epoch": 7.09, "step": 332999 }, { "epoch": 7.09, "learning_rate": 3.411020408163266e-05, "loss": 1.9243, "step": 333000 }, { "FLOPS loss": 0.07909004390239716, "L0_d": 696.17, "MLM loss": 1.8287863731384277, "epoch": 7.1, "step": 333499 }, { "epoch": 7.1, "learning_rate": 3.400836734693878e-05, "loss": 1.9272, "step": 333500 }, { "FLOPS loss": 0.0881006196141243, "L0_d": 717.56, "MLM loss": 1.8593976497650146, "epoch": 7.11, "step": 333999 }, { "epoch": 7.11, "learning_rate": 3.3906326530612245e-05, "loss": 1.9301, "step": 334000 }, { "FLOPS loss": 0.07782454788684845, "L0_d": 1056.52, "MLM loss": 1.8584022521972656, "epoch": 7.12, "step": 334499 }, { "epoch": 7.12, "learning_rate": 3.380428571428572e-05, "loss": 1.9287, "step": 334500 }, { "FLOPS loss": 0.09458693116903305, "L0_d": 1192.09, "MLM loss": 1.8450138568878174, "epoch": 7.13, "step": 334999 }, { "epoch": 7.13, "learning_rate": 3.370224489795918e-05, "loss": 1.9261, "step": 335000 }, { "FLOPS loss": 0.0697149708867073, "L0_d": 766.3, "MLM loss": 1.952140212059021, "epoch": 7.14, "step": 335499 }, { "epoch": 7.14, "learning_rate": 3.360020408163265e-05, "loss": 1.9257, "step": 335500 }, { "FLOPS loss": 0.06831005960702896, "L0_d": 805.2, "MLM loss": 1.7946230173110962, "epoch": 7.16, "step": 335999 }, { "epoch": 7.16, "learning_rate": 3.3498367346938773e-05, "loss": 1.9256, "step": 336000 }, { "FLOPS loss": 0.09496098756790161, "L0_d": 825.23, "MLM loss": 1.841353416442871, "epoch": 7.17, "step": 336499 }, { "epoch": 7.17, "learning_rate": 3.339632653061225e-05, "loss": 1.9287, "step": 336500 }, { "FLOPS loss": 0.07120657712221146, "L0_d": 649.5, "MLM loss": 1.8039329051971436, "epoch": 7.18, "step": 336999 }, { "epoch": 7.18, "learning_rate": 3.3294285714285716e-05, "loss": 1.9261, "step": 337000 }, { "FLOPS loss": 0.06209271773695946, "L0_d": 679.39, "MLM loss": 1.8043580055236816, "epoch": 7.19, "step": 337499 }, { "epoch": 7.19, "learning_rate": 3.319224489795919e-05, "loss": 1.9278, "step": 337500 }, { "FLOPS loss": 0.07758375257253647, "L0_d": 891.48, "MLM loss": 1.850175142288208, "epoch": 7.2, "step": 337999 }, { "epoch": 7.2, "learning_rate": 3.309020408163265e-05, "loss": 1.9252, "step": 338000 }, { "FLOPS loss": 0.07936911284923553, "L0_d": 770.59, "MLM loss": 1.8158717155456543, "epoch": 7.21, "step": 338499 }, { "epoch": 7.21, "learning_rate": 3.2988163265306124e-05, "loss": 1.926, "step": 338500 }, { "FLOPS loss": 0.06472300738096237, "L0_d": 691.17, "MLM loss": 1.9313558340072632, "epoch": 7.22, "step": 338999 }, { "epoch": 7.22, "learning_rate": 3.288632653061225e-05, "loss": 1.9242, "step": 339000 }, { "FLOPS loss": 0.08942249417304993, "L0_d": 774.66, "MLM loss": 1.9662246704101562, "epoch": 7.23, "step": 339499 }, { "epoch": 7.23, "learning_rate": 3.2784285714285716e-05, "loss": 1.9267, "step": 339500 }, { "FLOPS loss": 0.0655161663889885, "L0_d": 474.06, "MLM loss": 1.895153522491455, "epoch": 7.24, "step": 339999 }, { "epoch": 7.24, "learning_rate": 3.268224489795919e-05, "loss": 1.9222, "step": 340000 }, { "FLOPS loss": 0.0758613795042038, "L0_d": 719.14, "MLM loss": 1.8445680141448975, "epoch": 7.25, "step": 340499 }, { "epoch": 7.25, "learning_rate": 3.258020408163265e-05, "loss": 1.9217, "step": 340500 }, { "FLOPS loss": 0.0741974487900734, "L0_d": 599.89, "MLM loss": 1.9803168773651123, "epoch": 7.26, "step": 340999 }, { "epoch": 7.26, "learning_rate": 3.2478163265306124e-05, "loss": 1.9259, "step": 341000 }, { "FLOPS loss": 0.072549968957901, "L0_d": 668.38, "MLM loss": 1.759738802909851, "epoch": 7.27, "step": 341499 }, { "epoch": 7.27, "learning_rate": 3.2376326530612245e-05, "loss": 1.9204, "step": 341500 }, { "FLOPS loss": 0.07621914148330688, "L0_d": 1228.61, "MLM loss": 1.8269237279891968, "epoch": 7.28, "step": 341999 }, { "epoch": 7.28, "learning_rate": 3.2274285714285716e-05, "loss": 1.9188, "step": 342000 }, { "FLOPS loss": 0.07951581478118896, "L0_d": 692.34, "MLM loss": 1.728185772895813, "epoch": 7.29, "step": 342499 }, { "epoch": 7.29, "learning_rate": 3.217224489795919e-05, "loss": 1.9277, "step": 342500 }, { "FLOPS loss": 0.06861908733844757, "L0_d": 1225.88, "MLM loss": 1.8209030628204346, "epoch": 7.3, "step": 342999 }, { "epoch": 7.3, "learning_rate": 3.207020408163265e-05, "loss": 1.9211, "step": 343000 }, { "FLOPS loss": 0.08117002993822098, "L0_d": 726.34, "MLM loss": 1.8451335430145264, "epoch": 7.32, "step": 343499 }, { "epoch": 7.32, "learning_rate": 3.196836734693878e-05, "loss": 1.9229, "step": 343500 }, { "FLOPS loss": 0.0719624012708664, "L0_d": 743.8, "MLM loss": 1.6969846487045288, "epoch": 7.33, "step": 343999 }, { "epoch": 7.33, "learning_rate": 3.1866326530612244e-05, "loss": 1.9244, "step": 344000 }, { "FLOPS loss": 0.07608190178871155, "L0_d": 922.83, "MLM loss": 1.9612715244293213, "epoch": 7.34, "step": 344499 }, { "epoch": 7.34, "learning_rate": 3.1764285714285716e-05, "loss": 1.923, "step": 344500 }, { "FLOPS loss": 0.07321670651435852, "L0_d": 853.64, "MLM loss": 1.928910732269287, "epoch": 7.35, "step": 344999 }, { "epoch": 7.35, "learning_rate": 3.166224489795919e-05, "loss": 1.9226, "step": 345000 }, { "FLOPS loss": 0.07549431920051575, "L0_d": 863.84, "MLM loss": 1.8605866432189941, "epoch": 7.36, "step": 345499 }, { "epoch": 7.36, "learning_rate": 3.156020408163265e-05, "loss": 1.9229, "step": 345500 }, { "FLOPS loss": 0.0844147577881813, "L0_d": 718.67, "MLM loss": 1.7773514986038208, "epoch": 7.37, "step": 345999 }, { "epoch": 7.37, "learning_rate": 3.1458163265306123e-05, "loss": 1.9191, "step": 346000 }, { "FLOPS loss": 0.08949605375528336, "L0_d": 1317.33, "MLM loss": 1.8800100088119507, "epoch": 7.38, "step": 346499 }, { "epoch": 7.38, "learning_rate": 3.1356326530612244e-05, "loss": 1.9245, "step": 346500 }, { "FLOPS loss": 0.060002170503139496, "L0_d": 679.66, "MLM loss": 1.8119897842407227, "epoch": 7.39, "step": 346999 }, { "epoch": 7.39, "learning_rate": 3.1254285714285716e-05, "loss": 1.9225, "step": 347000 }, { "FLOPS loss": 0.0773637667298317, "L0_d": 792.98, "MLM loss": 1.8959424495697021, "epoch": 7.4, "step": 347499 }, { "epoch": 7.4, "learning_rate": 3.115224489795919e-05, "loss": 1.9232, "step": 347500 }, { "FLOPS loss": 0.06032504886388779, "L0_d": 722.84, "MLM loss": 1.9673316478729248, "epoch": 7.41, "step": 347999 }, { "epoch": 7.41, "learning_rate": 3.105020408163266e-05, "loss": 1.922, "step": 348000 }, { "FLOPS loss": 0.071210578083992, "L0_d": 612.83, "MLM loss": 1.831152319908142, "epoch": 7.42, "step": 348499 }, { "epoch": 7.42, "learning_rate": 3.094836734693878e-05, "loss": 1.9212, "step": 348500 }, { "FLOPS loss": 0.07164246588945389, "L0_d": 795.75, "MLM loss": 1.7671644687652588, "epoch": 7.43, "step": 348999 }, { "epoch": 7.43, "learning_rate": 3.0846326530612244e-05, "loss": 1.923, "step": 349000 }, { "FLOPS loss": 0.06108471378684044, "L0_d": 598.73, "MLM loss": 1.7506718635559082, "epoch": 7.44, "step": 349499 }, { "epoch": 7.44, "learning_rate": 3.0744285714285715e-05, "loss": 1.9199, "step": 349500 }, { "FLOPS loss": 0.06783506274223328, "L0_d": 765.02, "MLM loss": 1.7566602230072021, "epoch": 7.45, "step": 349999 }, { "epoch": 7.45, "learning_rate": 3.064224489795918e-05, "loss": 1.923, "step": 350000 }, { "FLOPS loss": 0.06063126027584076, "L0_d": 623.61, "MLM loss": 1.923569679260254, "epoch": 7.46, "step": 350499 }, { "epoch": 7.46, "learning_rate": 3.054040816326531e-05, "loss": 1.923, "step": 350500 }, { "FLOPS loss": 0.08617284893989563, "L0_d": 786.66, "MLM loss": 1.870776653289795, "epoch": 7.47, "step": 350999 }, { "epoch": 7.47, "learning_rate": 3.043836734693878e-05, "loss": 1.9196, "step": 351000 }, { "FLOPS loss": 0.0912565290927887, "L0_d": 1055.14, "MLM loss": 1.824958086013794, "epoch": 7.49, "step": 351499 }, { "epoch": 7.49, "learning_rate": 3.0336326530612247e-05, "loss": 1.9217, "step": 351500 }, { "FLOPS loss": 0.07441375404596329, "L0_d": 757.95, "MLM loss": 1.9523437023162842, "epoch": 7.5, "step": 351999 }, { "epoch": 7.5, "learning_rate": 3.0234285714285715e-05, "loss": 1.9204, "step": 352000 }, { "FLOPS loss": 0.06910394132137299, "L0_d": 647.09, "MLM loss": 1.7434977293014526, "epoch": 7.51, "step": 352499 }, { "epoch": 7.51, "learning_rate": 3.0132448979591836e-05, "loss": 1.9194, "step": 352500 }, { "FLOPS loss": 0.07549700140953064, "L0_d": 835.09, "MLM loss": 1.9095675945281982, "epoch": 7.52, "step": 352999 }, { "epoch": 7.52, "learning_rate": 3.0030408163265304e-05, "loss": 1.9195, "step": 353000 }, { "FLOPS loss": 0.06734315305948257, "L0_d": 722.47, "MLM loss": 1.8535124063491821, "epoch": 7.53, "step": 353499 }, { "epoch": 7.53, "learning_rate": 2.992836734693878e-05, "loss": 1.9224, "step": 353500 }, { "FLOPS loss": 0.0765361413359642, "L0_d": 696.53, "MLM loss": 1.7221925258636475, "epoch": 7.54, "step": 353999 }, { "epoch": 7.54, "learning_rate": 2.9826326530612247e-05, "loss": 1.9176, "step": 354000 }, { "FLOPS loss": 0.06397148221731186, "L0_d": 667.94, "MLM loss": 1.8591244220733643, "epoch": 7.55, "step": 354499 }, { "epoch": 7.55, "learning_rate": 2.9724285714285715e-05, "loss": 1.9174, "step": 354500 }, { "FLOPS loss": 0.08621693402528763, "L0_d": 1023.33, "MLM loss": 1.831404209136963, "epoch": 7.56, "step": 354999 }, { "epoch": 7.56, "learning_rate": 2.962244897959184e-05, "loss": 1.9169, "step": 355000 }, { "FLOPS loss": 0.06905224174261093, "L0_d": 788.47, "MLM loss": 1.9023628234863281, "epoch": 7.57, "step": 355499 }, { "epoch": 7.57, "learning_rate": 2.9520408163265307e-05, "loss": 1.9171, "step": 355500 }, { "FLOPS loss": 0.07182815670967102, "L0_d": 605.72, "MLM loss": 1.8447587490081787, "epoch": 7.58, "step": 355999 }, { "epoch": 7.58, "learning_rate": 2.9418367346938775e-05, "loss": 1.9181, "step": 356000 }, { "FLOPS loss": 0.06056385487318039, "L0_d": 805.12, "MLM loss": 1.8549630641937256, "epoch": 7.59, "step": 356499 }, { "epoch": 7.59, "learning_rate": 2.9316326530612247e-05, "loss": 1.9198, "step": 356500 }, { "FLOPS loss": 0.07179979234933853, "L0_d": 687.52, "MLM loss": 1.7600245475769043, "epoch": 7.6, "step": 356999 }, { "epoch": 7.6, "learning_rate": 2.921448979591837e-05, "loss": 1.9159, "step": 357000 }, { "FLOPS loss": 0.06811942905187607, "L0_d": 618.5, "MLM loss": 1.9086711406707764, "epoch": 7.61, "step": 357499 }, { "epoch": 7.61, "learning_rate": 2.911244897959184e-05, "loss": 1.919, "step": 357500 }, { "FLOPS loss": 0.09158661961555481, "L0_d": 780.5, "MLM loss": 1.7335734367370605, "epoch": 7.62, "step": 357999 }, { "epoch": 7.62, "learning_rate": 2.9010408163265307e-05, "loss": 1.9148, "step": 358000 }, { "FLOPS loss": 0.0640568807721138, "L0_d": 632.23, "MLM loss": 1.8534142971038818, "epoch": 7.63, "step": 358499 }, { "epoch": 7.63, "learning_rate": 2.8908367346938775e-05, "loss": 1.9175, "step": 358500 }, { "FLOPS loss": 0.06531016528606415, "L0_d": 839.8, "MLM loss": 1.8377444744110107, "epoch": 7.65, "step": 358999 }, { "epoch": 7.65, "learning_rate": 2.880632653061225e-05, "loss": 1.9144, "step": 359000 }, { "FLOPS loss": 0.08689986169338226, "L0_d": 864.11, "MLM loss": 1.752164363861084, "epoch": 7.66, "step": 359499 }, { "epoch": 7.66, "learning_rate": 2.870448979591837e-05, "loss": 1.9173, "step": 359500 }, { "FLOPS loss": 0.08605177700519562, "L0_d": 841.92, "MLM loss": 2.0353498458862305, "epoch": 7.67, "step": 359999 }, { "epoch": 7.67, "learning_rate": 2.860244897959184e-05, "loss": 1.915, "step": 360000 }, { "FLOPS loss": 0.08223269879817963, "L0_d": 663.62, "MLM loss": 1.9616049528121948, "epoch": 7.68, "step": 360499 }, { "epoch": 7.68, "learning_rate": 2.8500408163265307e-05, "loss": 1.9187, "step": 360500 }, { "FLOPS loss": 0.06230027228593826, "L0_d": 623.67, "MLM loss": 1.8606956005096436, "epoch": 7.69, "step": 360999 }, { "epoch": 7.69, "learning_rate": 2.8398367346938775e-05, "loss": 1.9192, "step": 361000 }, { "FLOPS loss": 0.08796975761651993, "L0_d": 961.97, "MLM loss": 1.7985609769821167, "epoch": 7.7, "step": 361499 }, { "epoch": 7.7, "learning_rate": 2.82965306122449e-05, "loss": 1.9193, "step": 361500 }, { "FLOPS loss": 0.07598540186882019, "L0_d": 680.53, "MLM loss": 1.9214376211166382, "epoch": 7.71, "step": 361999 }, { "epoch": 7.71, "learning_rate": 2.8194489795918367e-05, "loss": 1.9163, "step": 362000 }, { "FLOPS loss": 0.07575920224189758, "L0_d": 729.17, "MLM loss": 1.9614065885543823, "epoch": 7.72, "step": 362499 }, { "epoch": 7.72, "learning_rate": 2.809244897959184e-05, "loss": 1.9169, "step": 362500 }, { "FLOPS loss": 0.07568804174661636, "L0_d": 612.11, "MLM loss": 1.794695258140564, "epoch": 7.73, "step": 362999 }, { "epoch": 7.73, "learning_rate": 2.7990408163265307e-05, "loss": 1.9132, "step": 363000 }, { "FLOPS loss": 0.08963464200496674, "L0_d": 716.73, "MLM loss": 1.7673532962799072, "epoch": 7.74, "step": 363499 }, { "epoch": 7.74, "learning_rate": 2.788857142857143e-05, "loss": 1.914, "step": 363500 }, { "FLOPS loss": 0.07700413465499878, "L0_d": 543.23, "MLM loss": 1.705705165863037, "epoch": 7.75, "step": 363999 }, { "epoch": 7.75, "learning_rate": 2.77865306122449e-05, "loss": 1.9145, "step": 364000 }, { "FLOPS loss": 0.07794135063886642, "L0_d": 697.8, "MLM loss": 1.9613754749298096, "epoch": 7.76, "step": 364499 }, { "epoch": 7.76, "learning_rate": 2.7684489795918367e-05, "loss": 1.9186, "step": 364500 }, { "FLOPS loss": 0.07350470125675201, "L0_d": 785.53, "MLM loss": 1.8970494270324707, "epoch": 7.77, "step": 364999 }, { "epoch": 7.77, "learning_rate": 2.7582448979591842e-05, "loss": 1.9149, "step": 365000 }, { "FLOPS loss": 0.07593075931072235, "L0_d": 1040.44, "MLM loss": 1.8233039379119873, "epoch": 7.78, "step": 365499 }, { "epoch": 7.78, "learning_rate": 2.7480612244897962e-05, "loss": 1.9167, "step": 365500 }, { "FLOPS loss": 0.07376305758953094, "L0_d": 756.62, "MLM loss": 1.7765004634857178, "epoch": 7.79, "step": 365999 }, { "epoch": 7.79, "learning_rate": 2.737857142857143e-05, "loss": 1.9157, "step": 366000 }, { "FLOPS loss": 0.07158848643302917, "L0_d": 556.3, "MLM loss": 1.8171870708465576, "epoch": 7.8, "step": 366499 }, { "epoch": 7.8, "learning_rate": 2.72765306122449e-05, "loss": 1.9129, "step": 366500 }, { "FLOPS loss": 0.06732846796512604, "L0_d": 818.38, "MLM loss": 1.8742527961730957, "epoch": 7.82, "step": 366999 }, { "epoch": 7.82, "learning_rate": 2.7174489795918367e-05, "loss": 1.9129, "step": 367000 }, { "FLOPS loss": 0.06487272679805756, "L0_d": 669.91, "MLM loss": 1.7070577144622803, "epoch": 7.83, "step": 367499 }, { "epoch": 7.83, "learning_rate": 2.7072448979591835e-05, "loss": 1.9126, "step": 367500 }, { "FLOPS loss": 0.08324155956506729, "L0_d": 732.16, "MLM loss": 1.7398138046264648, "epoch": 7.84, "step": 367999 }, { "epoch": 7.84, "learning_rate": 2.697061224489796e-05, "loss": 1.9131, "step": 368000 }, { "FLOPS loss": 0.0730772539973259, "L0_d": 628.33, "MLM loss": 1.8350969552993774, "epoch": 7.85, "step": 368499 }, { "epoch": 7.85, "learning_rate": 2.686857142857143e-05, "loss": 1.913, "step": 368500 }, { "FLOPS loss": 0.08549657464027405, "L0_d": 948.14, "MLM loss": 1.8315303325653076, "epoch": 7.86, "step": 368999 }, { "epoch": 7.86, "learning_rate": 2.67665306122449e-05, "loss": 1.9119, "step": 369000 }, { "FLOPS loss": 0.08233258128166199, "L0_d": 708.77, "MLM loss": 1.9612047672271729, "epoch": 7.87, "step": 369499 }, { "epoch": 7.87, "learning_rate": 2.666448979591837e-05, "loss": 1.9116, "step": 369500 }, { "FLOPS loss": 0.05898122489452362, "L0_d": 617.86, "MLM loss": 1.8039360046386719, "epoch": 7.88, "step": 369999 }, { "epoch": 7.88, "learning_rate": 2.656265306122449e-05, "loss": 1.9108, "step": 370000 }, { "FLOPS loss": 0.06786539405584335, "L0_d": 863.75, "MLM loss": 1.921259880065918, "epoch": 7.89, "step": 370499 }, { "epoch": 7.89, "learning_rate": 2.646061224489796e-05, "loss": 1.9133, "step": 370500 }, { "FLOPS loss": 0.09662061184644699, "L0_d": 944.55, "MLM loss": 1.8544368743896484, "epoch": 7.9, "step": 370999 }, { "epoch": 7.9, "learning_rate": 2.6358571428571433e-05, "loss": 1.9118, "step": 371000 }, { "FLOPS loss": 0.07581643760204315, "L0_d": 655.27, "MLM loss": 1.8517892360687256, "epoch": 7.91, "step": 371499 }, { "epoch": 7.91, "learning_rate": 2.62565306122449e-05, "loss": 1.9103, "step": 371500 }, { "FLOPS loss": 0.062037497758865356, "L0_d": 596.05, "MLM loss": 1.9065392017364502, "epoch": 7.92, "step": 371999 }, { "epoch": 7.92, "learning_rate": 2.6154693877551022e-05, "loss": 1.9132, "step": 372000 }, { "FLOPS loss": 0.06359484791755676, "L0_d": 685.69, "MLM loss": 1.793999433517456, "epoch": 7.93, "step": 372499 }, { "epoch": 7.93, "learning_rate": 2.605265306122449e-05, "loss": 1.9114, "step": 372500 }, { "FLOPS loss": 0.06487156450748444, "L0_d": 1092.2, "MLM loss": 1.8625104427337646, "epoch": 7.94, "step": 372999 }, { "epoch": 7.94, "learning_rate": 2.595061224489796e-05, "loss": 1.9142, "step": 373000 }, { "FLOPS loss": 0.09129762649536133, "L0_d": 820.39, "MLM loss": 1.8764865398406982, "epoch": 7.95, "step": 373499 }, { "epoch": 7.95, "learning_rate": 2.5848571428571426e-05, "loss": 1.9147, "step": 373500 }, { "FLOPS loss": 0.08218321949243546, "L0_d": 650.84, "MLM loss": 1.7837188243865967, "epoch": 7.96, "step": 373999 }, { "epoch": 7.96, "learning_rate": 2.57465306122449e-05, "loss": 1.9108, "step": 374000 }, { "FLOPS loss": 0.08408604562282562, "L0_d": 712.58, "MLM loss": 1.7383946180343628, "epoch": 7.98, "step": 374499 }, { "epoch": 7.98, "learning_rate": 2.564448979591837e-05, "loss": 1.9127, "step": 374500 }, { "FLOPS loss": 0.07430750876665115, "L0_d": 710.8, "MLM loss": 1.8748829364776611, "epoch": 7.99, "step": 374999 }, { "epoch": 7.99, "learning_rate": 2.5542653061224494e-05, "loss": 1.9116, "step": 375000 }, { "FLOPS loss": 0.06837387382984161, "L0_d": 639.45, "MLM loss": 1.8347387313842773, "epoch": 8.0, "step": 375499 }, { "epoch": 8.0, "learning_rate": 2.544061224489796e-05, "loss": 1.9107, "step": 375500 }, { "FLOPS loss": 0.07094568014144897, "L0_d": 625.97, "MLM loss": 1.9303233623504639, "epoch": 8.01, "step": 375999 }, { "epoch": 8.01, "learning_rate": 2.533857142857143e-05, "loss": 1.9096, "step": 376000 }, { "FLOPS loss": 0.07427439838647842, "L0_d": 590.55, "MLM loss": 1.683091640472412, "epoch": 8.02, "step": 376499 }, { "epoch": 8.02, "learning_rate": 2.52365306122449e-05, "loss": 1.9121, "step": 376500 }, { "FLOPS loss": 0.09582684934139252, "L0_d": 720.77, "MLM loss": 1.9093230962753296, "epoch": 8.03, "step": 376999 }, { "epoch": 8.03, "learning_rate": 2.513448979591837e-05, "loss": 1.9072, "step": 377000 }, { "FLOPS loss": 0.06721869856119156, "L0_d": 743.12, "MLM loss": 1.7846167087554932, "epoch": 8.04, "step": 377499 }, { "epoch": 8.04, "learning_rate": 2.5032653061224493e-05, "loss": 1.908, "step": 377500 }, { "FLOPS loss": 0.08038800954818726, "L0_d": 802.34, "MLM loss": 1.8537399768829346, "epoch": 8.05, "step": 377999 }, { "epoch": 8.05, "learning_rate": 2.493061224489796e-05, "loss": 1.9074, "step": 378000 }, { "FLOPS loss": 0.06283194571733475, "L0_d": 806.69, "MLM loss": 1.9123787879943848, "epoch": 8.06, "step": 378499 }, { "epoch": 8.06, "learning_rate": 2.482857142857143e-05, "loss": 1.9098, "step": 378500 }, { "FLOPS loss": 0.07149063050746918, "L0_d": 930.73, "MLM loss": 2.009641408920288, "epoch": 8.07, "step": 378999 }, { "epoch": 8.07, "learning_rate": 2.47265306122449e-05, "loss": 1.9081, "step": 379000 }, { "FLOPS loss": 0.06387903541326523, "L0_d": 712.73, "MLM loss": 1.7018380165100098, "epoch": 8.08, "step": 379499 }, { "epoch": 8.08, "learning_rate": 2.462469387755102e-05, "loss": 1.9062, "step": 379500 }, { "FLOPS loss": 0.07200448960065842, "L0_d": 905.91, "MLM loss": 1.6971787214279175, "epoch": 8.09, "step": 379999 }, { "epoch": 8.09, "learning_rate": 2.452265306122449e-05, "loss": 1.9014, "step": 380000 }, { "FLOPS loss": 0.09073708951473236, "L0_d": 840.09, "MLM loss": 1.6935443878173828, "epoch": 8.1, "step": 380499 }, { "epoch": 8.1, "learning_rate": 2.442061224489796e-05, "loss": 1.9123, "step": 380500 }, { "FLOPS loss": 0.06841748207807541, "L0_d": 746.36, "MLM loss": 1.880540132522583, "epoch": 8.11, "step": 380999 }, { "epoch": 8.11, "learning_rate": 2.431857142857143e-05, "loss": 1.8999, "step": 381000 }, { "FLOPS loss": 0.055594101548194885, "L0_d": 584.33, "MLM loss": 1.7323696613311768, "epoch": 8.12, "step": 381499 }, { "epoch": 8.12, "learning_rate": 2.4216734693877553e-05, "loss": 1.9055, "step": 381500 }, { "FLOPS loss": 0.06862866133451462, "L0_d": 639.55, "MLM loss": 1.7428958415985107, "epoch": 8.13, "step": 381999 }, { "epoch": 8.13, "learning_rate": 2.411469387755102e-05, "loss": 1.9064, "step": 382000 }, { "FLOPS loss": 0.07999974489212036, "L0_d": 834.3, "MLM loss": 1.8679862022399902, "epoch": 8.15, "step": 382499 }, { "epoch": 8.15, "learning_rate": 2.401265306122449e-05, "loss": 1.9049, "step": 382500 }, { "FLOPS loss": 0.06594076007604599, "L0_d": 671.27, "MLM loss": 1.7892141342163086, "epoch": 8.16, "step": 382999 }, { "epoch": 8.16, "learning_rate": 2.3910612244897958e-05, "loss": 1.9086, "step": 383000 }, { "FLOPS loss": 0.08169030398130417, "L0_d": 679.23, "MLM loss": 1.7941107749938965, "epoch": 8.17, "step": 383499 }, { "epoch": 8.17, "learning_rate": 2.380877551020408e-05, "loss": 1.9062, "step": 383500 }, { "FLOPS loss": 0.08979839831590652, "L0_d": 802.62, "MLM loss": 1.776297688484192, "epoch": 8.18, "step": 383999 }, { "epoch": 8.18, "learning_rate": 2.3706734693877553e-05, "loss": 1.9051, "step": 384000 }, { "FLOPS loss": 0.08627878129482269, "L0_d": 852.94, "MLM loss": 2.0511772632598877, "epoch": 8.19, "step": 384499 }, { "epoch": 8.19, "learning_rate": 2.360469387755102e-05, "loss": 1.9053, "step": 384500 }, { "FLOPS loss": 0.0771448090672493, "L0_d": 644.89, "MLM loss": 1.7911652326583862, "epoch": 8.2, "step": 384999 }, { "epoch": 8.2, "learning_rate": 2.3502653061224493e-05, "loss": 1.9049, "step": 385000 }, { "FLOPS loss": 0.09200733155012131, "L0_d": 624.42, "MLM loss": 1.8275014162063599, "epoch": 8.21, "step": 385499 }, { "epoch": 8.21, "learning_rate": 2.340061224489796e-05, "loss": 1.9085, "step": 385500 }, { "FLOPS loss": 0.0631524920463562, "L0_d": 915.19, "MLM loss": 1.7387886047363281, "epoch": 8.22, "step": 385999 }, { "epoch": 8.22, "learning_rate": 2.329877551020408e-05, "loss": 1.9045, "step": 386000 }, { "FLOPS loss": 0.07859036326408386, "L0_d": 814.27, "MLM loss": 1.9064040184020996, "epoch": 8.23, "step": 386499 }, { "epoch": 8.23, "learning_rate": 2.3196734693877553e-05, "loss": 1.9028, "step": 386500 }, { "FLOPS loss": 0.08318474888801575, "L0_d": 662.44, "MLM loss": 1.8862558603286743, "epoch": 8.24, "step": 386999 }, { "epoch": 8.24, "learning_rate": 2.309469387755102e-05, "loss": 1.9052, "step": 387000 }, { "FLOPS loss": 0.07222796231508255, "L0_d": 875.73, "MLM loss": 1.7195855379104614, "epoch": 8.25, "step": 387499 }, { "epoch": 8.25, "learning_rate": 2.2992653061224493e-05, "loss": 1.9057, "step": 387500 }, { "FLOPS loss": 0.06394381076097488, "L0_d": 656.22, "MLM loss": 1.7930631637573242, "epoch": 8.26, "step": 387999 }, { "epoch": 8.26, "learning_rate": 2.2890816326530613e-05, "loss": 1.9036, "step": 388000 }, { "FLOPS loss": 0.08451195806264877, "L0_d": 1135.05, "MLM loss": 1.8217693567276, "epoch": 8.27, "step": 388499 }, { "epoch": 8.27, "learning_rate": 2.278877551020408e-05, "loss": 1.9043, "step": 388500 }, { "FLOPS loss": 0.0837959423661232, "L0_d": 694.78, "MLM loss": 1.817704677581787, "epoch": 8.28, "step": 388999 }, { "epoch": 8.28, "learning_rate": 2.268673469387755e-05, "loss": 1.9049, "step": 389000 }, { "FLOPS loss": 0.08096183836460114, "L0_d": 749.94, "MLM loss": 1.900554895401001, "epoch": 8.29, "step": 389499 }, { "epoch": 8.29, "learning_rate": 2.258469387755102e-05, "loss": 1.9013, "step": 389500 }, { "FLOPS loss": 0.07682886719703674, "L0_d": 733.36, "MLM loss": 1.9024741649627686, "epoch": 8.31, "step": 389999 }, { "epoch": 8.31, "learning_rate": 2.2482857142857145e-05, "loss": 1.9057, "step": 390000 }, { "FLOPS loss": 0.07478464394807816, "L0_d": 660.47, "MLM loss": 1.8336386680603027, "epoch": 8.32, "step": 390499 }, { "epoch": 8.32, "learning_rate": 2.2380816326530613e-05, "loss": 1.9003, "step": 390500 }, { "FLOPS loss": 0.06300283223390579, "L0_d": 685.8, "MLM loss": 1.796729564666748, "epoch": 8.33, "step": 390999 }, { "epoch": 8.33, "learning_rate": 2.2278775510204084e-05, "loss": 1.9027, "step": 391000 }, { "FLOPS loss": 0.07821809500455856, "L0_d": 776.97, "MLM loss": 1.9271748065948486, "epoch": 8.34, "step": 391499 }, { "epoch": 8.34, "learning_rate": 2.2176734693877553e-05, "loss": 1.9022, "step": 391500 }, { "FLOPS loss": 0.07604720443487167, "L0_d": 684.19, "MLM loss": 1.953967809677124, "epoch": 8.35, "step": 391999 }, { "epoch": 8.35, "learning_rate": 2.2074693877551024e-05, "loss": 1.9029, "step": 392000 }, { "FLOPS loss": 0.0781513974070549, "L0_d": 887.06, "MLM loss": 1.827120304107666, "epoch": 8.36, "step": 392499 }, { "epoch": 8.36, "learning_rate": 2.1972857142857145e-05, "loss": 1.9017, "step": 392500 }, { "FLOPS loss": 0.06326673179864883, "L0_d": 775.97, "MLM loss": 1.8110060691833496, "epoch": 8.37, "step": 392999 }, { "epoch": 8.37, "learning_rate": 2.1870816326530613e-05, "loss": 1.9043, "step": 393000 }, { "FLOPS loss": 0.08700016140937805, "L0_d": 814.06, "MLM loss": 1.7394301891326904, "epoch": 8.38, "step": 393499 }, { "epoch": 8.38, "learning_rate": 2.176877551020408e-05, "loss": 1.8996, "step": 393500 }, { "FLOPS loss": 0.07348722219467163, "L0_d": 698.36, "MLM loss": 1.963762640953064, "epoch": 8.39, "step": 393999 }, { "epoch": 8.39, "learning_rate": 2.1666734693877552e-05, "loss": 1.9034, "step": 394000 }, { "FLOPS loss": 0.08353521674871445, "L0_d": 892.64, "MLM loss": 1.8784441947937012, "epoch": 8.4, "step": 394499 }, { "epoch": 8.4, "learning_rate": 2.156469387755102e-05, "loss": 1.8994, "step": 394500 }, { "FLOPS loss": 0.06172090768814087, "L0_d": 537.95, "MLM loss": 1.8824307918548584, "epoch": 8.41, "step": 394999 }, { "epoch": 8.41, "learning_rate": 2.1462857142857145e-05, "loss": 1.9054, "step": 395000 }, { "FLOPS loss": 0.06748291105031967, "L0_d": 672.89, "MLM loss": 1.723147988319397, "epoch": 8.42, "step": 395499 }, { "epoch": 8.42, "learning_rate": 2.1360816326530613e-05, "loss": 1.904, "step": 395500 }, { "FLOPS loss": 0.07689233869314194, "L0_d": 822.42, "MLM loss": 1.7123825550079346, "epoch": 8.43, "step": 395999 }, { "epoch": 8.43, "learning_rate": 2.125877551020408e-05, "loss": 1.9018, "step": 396000 }, { "FLOPS loss": 0.0659564808011055, "L0_d": 635.97, "MLM loss": 1.9036650657653809, "epoch": 8.44, "step": 396499 }, { "epoch": 8.44, "learning_rate": 2.1156734693877552e-05, "loss": 1.8999, "step": 396500 }, { "FLOPS loss": 0.07588682323694229, "L0_d": 804.62, "MLM loss": 1.774322271347046, "epoch": 8.45, "step": 396999 }, { "epoch": 8.45, "learning_rate": 2.105469387755102e-05, "loss": 1.9002, "step": 397000 }, { "FLOPS loss": 0.05796373263001442, "L0_d": 603.67, "MLM loss": 1.7872540950775146, "epoch": 8.46, "step": 397499 }, { "epoch": 8.47, "learning_rate": 2.0952857142857144e-05, "loss": 1.899, "step": 397500 }, { "FLOPS loss": 0.06982676684856415, "L0_d": 703.95, "MLM loss": 1.8720976114273071, "epoch": 8.48, "step": 397999 }, { "epoch": 8.48, "learning_rate": 2.0850816326530616e-05, "loss": 1.902, "step": 398000 }, { "FLOPS loss": 0.06411126255989075, "L0_d": 640.84, "MLM loss": 1.9107850790023804, "epoch": 8.49, "step": 398499 }, { "epoch": 8.49, "learning_rate": 2.0748775510204084e-05, "loss": 1.8978, "step": 398500 }, { "FLOPS loss": 0.0808933675289154, "L0_d": 718.69, "MLM loss": 1.850345492362976, "epoch": 8.5, "step": 398999 }, { "epoch": 8.5, "learning_rate": 2.0646734693877552e-05, "loss": 1.8977, "step": 399000 }, { "FLOPS loss": 0.0982719212770462, "L0_d": 1116.62, "MLM loss": 1.8427366018295288, "epoch": 8.51, "step": 399499 }, { "epoch": 8.51, "learning_rate": 2.0544897959183673e-05, "loss": 1.9048, "step": 399500 }, { "FLOPS loss": 0.08528366684913635, "L0_d": 807.28, "MLM loss": 1.8523929119110107, "epoch": 8.52, "step": 399999 }, { "epoch": 8.52, "learning_rate": 2.0442857142857144e-05, "loss": 1.8993, "step": 400000 }, { "FLOPS loss": 0.07065005600452423, "L0_d": 602.44, "MLM loss": 1.8898108005523682, "epoch": 8.53, "step": 400499 }, { "epoch": 8.53, "learning_rate": 2.0340816326530612e-05, "loss": 1.8997, "step": 400500 }, { "FLOPS loss": 0.07828733325004578, "L0_d": 999.39, "MLM loss": 1.8211073875427246, "epoch": 8.54, "step": 400999 }, { "epoch": 8.54, "learning_rate": 2.0238775510204084e-05, "loss": 1.9008, "step": 401000 }, { "FLOPS loss": 0.07446083426475525, "L0_d": 747.81, "MLM loss": 1.7699792385101318, "epoch": 8.55, "step": 401499 }, { "epoch": 8.55, "learning_rate": 2.0136938775510204e-05, "loss": 1.8991, "step": 401500 }, { "FLOPS loss": 0.06935106962919235, "L0_d": 611.12, "MLM loss": 1.894187569618225, "epoch": 8.56, "step": 401999 }, { "epoch": 8.56, "learning_rate": 2.0034897959183672e-05, "loss": 1.901, "step": 402000 }, { "FLOPS loss": 0.0887903943657875, "L0_d": 788.98, "MLM loss": 1.712061882019043, "epoch": 8.57, "step": 402499 }, { "epoch": 8.57, "learning_rate": 1.9932857142857144e-05, "loss": 1.8946, "step": 402500 }, { "FLOPS loss": 0.0613437294960022, "L0_d": 597.14, "MLM loss": 1.7486085891723633, "epoch": 8.58, "step": 402999 }, { "epoch": 8.58, "learning_rate": 1.9830816326530612e-05, "loss": 1.901, "step": 403000 }, { "FLOPS loss": 0.07355440407991409, "L0_d": 805.77, "MLM loss": 1.9495327472686768, "epoch": 8.59, "step": 403499 }, { "epoch": 8.59, "learning_rate": 1.9728979591836736e-05, "loss": 1.8964, "step": 403500 }, { "FLOPS loss": 0.07202013581991196, "L0_d": 770.02, "MLM loss": 2.0217814445495605, "epoch": 8.6, "step": 403999 }, { "epoch": 8.6, "learning_rate": 1.9626938775510208e-05, "loss": 1.8962, "step": 404000 }, { "FLOPS loss": 0.07498647272586823, "L0_d": 800.06, "MLM loss": 1.692859411239624, "epoch": 8.61, "step": 404499 }, { "epoch": 8.61, "learning_rate": 1.9524897959183676e-05, "loss": 1.8966, "step": 404500 }, { "FLOPS loss": 0.06827228516340256, "L0_d": 777.12, "MLM loss": 1.7969110012054443, "epoch": 8.62, "step": 404999 }, { "epoch": 8.62, "learning_rate": 1.9422857142857144e-05, "loss": 1.8969, "step": 405000 }, { "FLOPS loss": 0.07816651463508606, "L0_d": 755.45, "MLM loss": 1.8361034393310547, "epoch": 8.64, "step": 405499 }, { "epoch": 8.64, "learning_rate": 1.9321020408163264e-05, "loss": 1.9007, "step": 405500 }, { "FLOPS loss": 0.07840103656053543, "L0_d": 852.75, "MLM loss": 1.8891184329986572, "epoch": 8.65, "step": 405999 }, { "epoch": 8.65, "learning_rate": 1.9218979591836736e-05, "loss": 1.8984, "step": 406000 }, { "FLOPS loss": 0.06752259284257889, "L0_d": 679.36, "MLM loss": 1.6485272645950317, "epoch": 8.66, "step": 406499 }, { "epoch": 8.66, "learning_rate": 1.9116938775510204e-05, "loss": 1.8959, "step": 406500 }, { "FLOPS loss": 0.07557836920022964, "L0_d": 606.3, "MLM loss": 1.8115220069885254, "epoch": 8.67, "step": 406999 }, { "epoch": 8.67, "learning_rate": 1.9014897959183675e-05, "loss": 1.8979, "step": 407000 }, { "FLOPS loss": 0.07810939103364944, "L0_d": 803.3, "MLM loss": 1.8370587825775146, "epoch": 8.68, "step": 407499 }, { "epoch": 8.68, "learning_rate": 1.8913061224489796e-05, "loss": 1.8973, "step": 407500 }, { "FLOPS loss": 0.07769665867090225, "L0_d": 854.0, "MLM loss": 1.7443437576293945, "epoch": 8.69, "step": 407999 }, { "epoch": 8.69, "learning_rate": 1.8811020408163264e-05, "loss": 1.9003, "step": 408000 }, { "FLOPS loss": 0.07340855151414871, "L0_d": 762.5, "MLM loss": 1.8406531810760498, "epoch": 8.7, "step": 408499 }, { "epoch": 8.7, "learning_rate": 1.8708979591836736e-05, "loss": 1.8983, "step": 408500 }, { "FLOPS loss": 0.07528545707464218, "L0_d": 656.95, "MLM loss": 1.853143572807312, "epoch": 8.71, "step": 408999 }, { "epoch": 8.71, "learning_rate": 1.8606938775510204e-05, "loss": 1.8998, "step": 409000 }, { "FLOPS loss": 0.09381501376628876, "L0_d": 876.23, "MLM loss": 1.7811716794967651, "epoch": 8.72, "step": 409499 }, { "epoch": 8.72, "learning_rate": 1.8504897959183672e-05, "loss": 1.8971, "step": 409500 }, { "FLOPS loss": 0.09534640610218048, "L0_d": 803.41, "MLM loss": 1.7951806783676147, "epoch": 8.73, "step": 409999 }, { "epoch": 8.73, "learning_rate": 1.8403061224489796e-05, "loss": 1.8951, "step": 410000 }, { "FLOPS loss": 0.06121053919196129, "L0_d": 595.77, "MLM loss": 1.647636890411377, "epoch": 8.74, "step": 410499 }, { "epoch": 8.74, "learning_rate": 1.8301020408163267e-05, "loss": 1.8974, "step": 410500 }, { "FLOPS loss": 0.06547104567289352, "L0_d": 565.81, "MLM loss": 1.7979687452316284, "epoch": 8.75, "step": 410999 }, { "epoch": 8.75, "learning_rate": 1.8198979591836735e-05, "loss": 1.8961, "step": 411000 }, { "FLOPS loss": 0.09215452522039413, "L0_d": 841.16, "MLM loss": 1.897557258605957, "epoch": 8.76, "step": 411499 }, { "epoch": 8.76, "learning_rate": 1.8096938775510207e-05, "loss": 1.8968, "step": 411500 }, { "FLOPS loss": 0.08165416121482849, "L0_d": 823.58, "MLM loss": 1.8826181888580322, "epoch": 8.77, "step": 411999 }, { "epoch": 8.77, "learning_rate": 1.7994897959183675e-05, "loss": 1.8971, "step": 412000 }, { "FLOPS loss": 0.07333676517009735, "L0_d": 659.52, "MLM loss": 1.7057745456695557, "epoch": 8.78, "step": 412499 }, { "epoch": 8.78, "learning_rate": 1.7893061224489796e-05, "loss": 1.8954, "step": 412500 }, { "FLOPS loss": 0.06546351313591003, "L0_d": 597.67, "MLM loss": 1.8420034646987915, "epoch": 8.8, "step": 412999 }, { "epoch": 8.8, "learning_rate": 1.7791020408163267e-05, "loss": 1.8968, "step": 413000 }, { "FLOPS loss": 0.06565094739198685, "L0_d": 615.06, "MLM loss": 1.8575172424316406, "epoch": 8.81, "step": 413499 }, { "epoch": 8.81, "learning_rate": 1.7688979591836735e-05, "loss": 1.8928, "step": 413500 }, { "FLOPS loss": 0.06962642818689346, "L0_d": 819.78, "MLM loss": 1.8613834381103516, "epoch": 8.82, "step": 413999 }, { "epoch": 8.82, "learning_rate": 1.7586938775510207e-05, "loss": 1.8949, "step": 414000 }, { "FLOPS loss": 0.08159743249416351, "L0_d": 831.28, "MLM loss": 1.8682200908660889, "epoch": 8.83, "step": 414499 }, { "epoch": 8.83, "learning_rate": 1.7485102040816327e-05, "loss": 1.8959, "step": 414500 }, { "FLOPS loss": 0.08759882301092148, "L0_d": 845.52, "MLM loss": 1.7896513938903809, "epoch": 8.84, "step": 414999 }, { "epoch": 8.84, "learning_rate": 1.7383061224489796e-05, "loss": 1.8955, "step": 415000 }, { "FLOPS loss": 0.0938563421368599, "L0_d": 1010.78, "MLM loss": 1.7237412929534912, "epoch": 8.85, "step": 415499 }, { "epoch": 8.85, "learning_rate": 1.7281020408163264e-05, "loss": 1.8961, "step": 415500 }, { "FLOPS loss": 0.07220235466957092, "L0_d": 654.64, "MLM loss": 1.8532274961471558, "epoch": 8.86, "step": 415999 }, { "epoch": 8.86, "learning_rate": 1.7178979591836735e-05, "loss": 1.895, "step": 416000 }, { "FLOPS loss": 0.06810647994279861, "L0_d": 613.75, "MLM loss": 2.043182849884033, "epoch": 8.87, "step": 416499 }, { "epoch": 8.87, "learning_rate": 1.707714285714286e-05, "loss": 1.8943, "step": 416500 }, { "FLOPS loss": 0.07071580737829208, "L0_d": 661.27, "MLM loss": 1.9305920600891113, "epoch": 8.88, "step": 416999 }, { "epoch": 8.88, "learning_rate": 1.6975102040816327e-05, "loss": 1.8946, "step": 417000 }, { "FLOPS loss": 0.06806696206331253, "L0_d": 770.3, "MLM loss": 1.8732974529266357, "epoch": 8.89, "step": 417499 }, { "epoch": 8.89, "learning_rate": 1.68730612244898e-05, "loss": 1.8938, "step": 417500 }, { "FLOPS loss": 0.0684627816081047, "L0_d": 920.95, "MLM loss": 1.9407403469085693, "epoch": 8.9, "step": 417999 }, { "epoch": 8.9, "learning_rate": 1.6771020408163267e-05, "loss": 1.8937, "step": 418000 }, { "FLOPS loss": 0.06968861073255539, "L0_d": 698.0, "MLM loss": 1.9223194122314453, "epoch": 8.91, "step": 418499 }, { "epoch": 8.91, "learning_rate": 1.6668979591836735e-05, "loss": 1.8959, "step": 418500 }, { "FLOPS loss": 0.07910454273223877, "L0_d": 1309.33, "MLM loss": 1.7841010093688965, "epoch": 8.92, "step": 418999 }, { "epoch": 8.92, "learning_rate": 1.656714285714286e-05, "loss": 1.8974, "step": 419000 }, { "FLOPS loss": 0.06197473779320717, "L0_d": 706.7, "MLM loss": 1.8492615222930908, "epoch": 8.93, "step": 419499 }, { "epoch": 8.93, "learning_rate": 1.6465102040816327e-05, "loss": 1.8932, "step": 419500 }, { "FLOPS loss": 0.058207958936691284, "L0_d": 604.22, "MLM loss": 1.8547852039337158, "epoch": 8.94, "step": 419999 }, { "epoch": 8.94, "learning_rate": 1.63630612244898e-05, "loss": 1.8907, "step": 420000 }, { "FLOPS loss": 0.09094519168138504, "L0_d": 902.23, "MLM loss": 1.9144949913024902, "epoch": 8.95, "step": 420499 }, { "epoch": 8.95, "learning_rate": 1.6261020408163267e-05, "loss": 1.8927, "step": 420500 }, { "FLOPS loss": 0.06928250938653946, "L0_d": 714.59, "MLM loss": 1.7490980625152588, "epoch": 8.97, "step": 420999 }, { "epoch": 8.97, "learning_rate": 1.6159183673469387e-05, "loss": 1.8895, "step": 421000 }, { "FLOPS loss": 0.0713324099779129, "L0_d": 694.98, "MLM loss": 1.7904343605041504, "epoch": 8.98, "step": 421499 }, { "epoch": 8.98, "learning_rate": 1.6057142857142855e-05, "loss": 1.8895, "step": 421500 }, { "FLOPS loss": 0.07053466886281967, "L0_d": 647.95, "MLM loss": 1.8427083492279053, "epoch": 8.99, "step": 421999 }, { "epoch": 8.99, "learning_rate": 1.5955102040816327e-05, "loss": 1.8941, "step": 422000 }, { "FLOPS loss": 0.08139703422784805, "L0_d": 1328.97, "MLM loss": 1.7719063758850098, "epoch": 9.0, "step": 422499 }, { "epoch": 9.0, "learning_rate": 1.5853061224489795e-05, "loss": 1.8931, "step": 422500 }, { "FLOPS loss": 0.06293307989835739, "L0_d": 656.23, "MLM loss": 1.8263726234436035, "epoch": 9.01, "step": 422999 }, { "epoch": 9.01, "learning_rate": 1.575122448979592e-05, "loss": 1.89, "step": 423000 }, { "FLOPS loss": 0.08060479164123535, "L0_d": 735.62, "MLM loss": 1.697467565536499, "epoch": 9.02, "step": 423499 }, { "epoch": 9.02, "learning_rate": 1.564918367346939e-05, "loss": 1.89, "step": 423500 }, { "FLOPS loss": 0.08011506497859955, "L0_d": 729.72, "MLM loss": 1.7792555093765259, "epoch": 9.03, "step": 423999 }, { "epoch": 9.03, "learning_rate": 1.554714285714286e-05, "loss": 1.8894, "step": 424000 }, { "FLOPS loss": 0.07208956032991409, "L0_d": 629.22, "MLM loss": 1.8163645267486572, "epoch": 9.04, "step": 424499 }, { "epoch": 9.04, "learning_rate": 1.5445102040816327e-05, "loss": 1.8884, "step": 424500 }, { "FLOPS loss": 0.06710517406463623, "L0_d": 770.2, "MLM loss": 1.7197039127349854, "epoch": 9.05, "step": 424999 }, { "epoch": 9.05, "learning_rate": 1.534326530612245e-05, "loss": 1.89, "step": 425000 }, { "FLOPS loss": 0.09404978156089783, "L0_d": 791.61, "MLM loss": 1.7534675598144531, "epoch": 9.06, "step": 425499 }, { "epoch": 9.06, "learning_rate": 1.5241224489795919e-05, "loss": 1.8884, "step": 425500 }, { "FLOPS loss": 0.07811388373374939, "L0_d": 679.25, "MLM loss": 1.8744385242462158, "epoch": 9.07, "step": 425999 }, { "epoch": 9.07, "learning_rate": 1.513918367346939e-05, "loss": 1.8896, "step": 426000 }, { "FLOPS loss": 0.08018653094768524, "L0_d": 643.23, "MLM loss": 1.8160462379455566, "epoch": 9.08, "step": 426499 }, { "epoch": 9.08, "learning_rate": 1.5037142857142858e-05, "loss": 1.8896, "step": 426500 }, { "FLOPS loss": 0.07460828870534897, "L0_d": 683.14, "MLM loss": 1.7526308298110962, "epoch": 9.09, "step": 426999 }, { "epoch": 9.09, "learning_rate": 1.4935102040816326e-05, "loss": 1.8882, "step": 427000 }, { "FLOPS loss": 0.06290356814861298, "L0_d": 624.78, "MLM loss": 1.7872731685638428, "epoch": 9.1, "step": 427499 }, { "epoch": 9.1, "learning_rate": 1.4833061224489796e-05, "loss": 1.8877, "step": 427500 }, { "FLOPS loss": 0.0771726593375206, "L0_d": 798.14, "MLM loss": 1.810828447341919, "epoch": 9.11, "step": 427999 }, { "epoch": 9.11, "learning_rate": 1.473122448979592e-05, "loss": 1.883, "step": 428000 }, { "FLOPS loss": 0.07212334126234055, "L0_d": 630.3, "MLM loss": 1.8126250505447388, "epoch": 9.13, "step": 428499 }, { "epoch": 9.13, "learning_rate": 1.4629183673469388e-05, "loss": 1.8931, "step": 428500 }, { "FLOPS loss": 0.06718722730875015, "L0_d": 693.72, "MLM loss": 1.8408608436584473, "epoch": 9.14, "step": 428999 }, { "epoch": 9.14, "learning_rate": 1.4527142857142858e-05, "loss": 1.8907, "step": 429000 }, { "FLOPS loss": 0.07557131350040436, "L0_d": 788.81, "MLM loss": 1.761578917503357, "epoch": 9.15, "step": 429499 }, { "epoch": 9.15, "learning_rate": 1.4425102040816326e-05, "loss": 1.8882, "step": 429500 }, { "FLOPS loss": 0.07352057844400406, "L0_d": 762.31, "MLM loss": 1.7296146154403687, "epoch": 9.16, "step": 429999 }, { "epoch": 9.16, "learning_rate": 1.4323265306122449e-05, "loss": 1.8895, "step": 430000 }, { "FLOPS loss": 0.07733792811632156, "L0_d": 806.64, "MLM loss": 1.7991564273834229, "epoch": 9.17, "step": 430499 }, { "epoch": 9.17, "learning_rate": 1.422122448979592e-05, "loss": 1.8896, "step": 430500 }, { "FLOPS loss": 0.08784882724285126, "L0_d": 798.2, "MLM loss": 1.869628667831421, "epoch": 9.18, "step": 430999 }, { "epoch": 9.18, "learning_rate": 1.4119183673469388e-05, "loss": 1.8862, "step": 431000 }, { "FLOPS loss": 0.06718917936086655, "L0_d": 759.0, "MLM loss": 1.8948330879211426, "epoch": 9.19, "step": 431499 }, { "epoch": 9.19, "learning_rate": 1.4017142857142856e-05, "loss": 1.8914, "step": 431500 }, { "FLOPS loss": 0.07390324771404266, "L0_d": 652.94, "MLM loss": 1.825411081314087, "epoch": 9.2, "step": 431999 }, { "epoch": 9.2, "learning_rate": 1.3915102040816328e-05, "loss": 1.8882, "step": 432000 }, { "FLOPS loss": 0.0791919007897377, "L0_d": 709.88, "MLM loss": 1.9184443950653076, "epoch": 9.21, "step": 432499 }, { "epoch": 9.21, "learning_rate": 1.381326530612245e-05, "loss": 1.8842, "step": 432500 }, { "FLOPS loss": 0.061726462095975876, "L0_d": 551.17, "MLM loss": 1.8523519039154053, "epoch": 9.22, "step": 432999 }, { "epoch": 9.22, "learning_rate": 1.3711224489795918e-05, "loss": 1.889, "step": 433000 }, { "FLOPS loss": 0.07693947851657867, "L0_d": 629.14, "MLM loss": 1.7981572151184082, "epoch": 9.23, "step": 433499 }, { "epoch": 9.23, "learning_rate": 1.360918367346939e-05, "loss": 1.886, "step": 433500 }, { "FLOPS loss": 0.07043065875768661, "L0_d": 669.44, "MLM loss": 1.8404135704040527, "epoch": 9.24, "step": 433999 }, { "epoch": 9.24, "learning_rate": 1.3507142857142858e-05, "loss": 1.8868, "step": 434000 }, { "FLOPS loss": 0.06310301274061203, "L0_d": 744.97, "MLM loss": 1.7336649894714355, "epoch": 9.25, "step": 434499 }, { "epoch": 9.25, "learning_rate": 1.3405102040816328e-05, "loss": 1.8908, "step": 434500 }, { "FLOPS loss": 0.06630953401327133, "L0_d": 758.22, "MLM loss": 1.7070391178131104, "epoch": 9.26, "step": 434999 }, { "epoch": 9.26, "learning_rate": 1.330326530612245e-05, "loss": 1.8862, "step": 435000 }, { "FLOPS loss": 0.07912331819534302, "L0_d": 704.33, "MLM loss": 1.72645103931427, "epoch": 9.27, "step": 435499 }, { "epoch": 9.27, "learning_rate": 1.3201224489795918e-05, "loss": 1.8879, "step": 435500 }, { "FLOPS loss": 0.061746422201395035, "L0_d": 685.48, "MLM loss": 1.7991679906845093, "epoch": 9.28, "step": 435999 }, { "epoch": 9.28, "learning_rate": 1.309918367346939e-05, "loss": 1.8863, "step": 436000 }, { "FLOPS loss": 0.06622132658958435, "L0_d": 738.52, "MLM loss": 1.7106609344482422, "epoch": 9.3, "step": 436499 }, { "epoch": 9.3, "learning_rate": 1.2997142857142858e-05, "loss": 1.8833, "step": 436500 }, { "FLOPS loss": 0.08270273357629776, "L0_d": 1133.8, "MLM loss": 1.7119030952453613, "epoch": 9.31, "step": 436999 }, { "epoch": 9.31, "learning_rate": 1.289530612244898e-05, "loss": 1.8889, "step": 437000 }, { "FLOPS loss": 0.08029799908399582, "L0_d": 887.58, "MLM loss": 1.7202215194702148, "epoch": 9.32, "step": 437499 }, { "epoch": 9.32, "learning_rate": 1.2793265306122448e-05, "loss": 1.8836, "step": 437500 }, { "FLOPS loss": 0.0647151842713356, "L0_d": 644.31, "MLM loss": 1.7943322658538818, "epoch": 9.33, "step": 437999 }, { "epoch": 9.33, "learning_rate": 1.269122448979592e-05, "loss": 1.8865, "step": 438000 }, { "FLOPS loss": 0.09509658813476562, "L0_d": 1019.94, "MLM loss": 1.8161685466766357, "epoch": 9.34, "step": 438499 }, { "epoch": 9.34, "learning_rate": 1.2589183673469388e-05, "loss": 1.8856, "step": 438500 }, { "FLOPS loss": 0.08688385784626007, "L0_d": 775.3, "MLM loss": 1.843897819519043, "epoch": 9.35, "step": 438999 }, { "epoch": 9.35, "learning_rate": 1.2487346938775512e-05, "loss": 1.887, "step": 439000 }, { "FLOPS loss": 0.07664480060338974, "L0_d": 716.61, "MLM loss": 1.8365232944488525, "epoch": 9.36, "step": 439499 }, { "epoch": 9.36, "learning_rate": 1.2385306122448981e-05, "loss": 1.8852, "step": 439500 }, { "FLOPS loss": 0.07584109157323837, "L0_d": 1017.5, "MLM loss": 1.8281240463256836, "epoch": 9.37, "step": 439999 }, { "epoch": 9.37, "learning_rate": 1.228326530612245e-05, "loss": 1.8839, "step": 440000 }, { "FLOPS loss": 0.09187859296798706, "L0_d": 768.38, "MLM loss": 1.9023255109786987, "epoch": 9.38, "step": 440499 }, { "epoch": 9.38, "learning_rate": 1.218122448979592e-05, "loss": 1.8842, "step": 440500 }, { "FLOPS loss": 0.07121729850769043, "L0_d": 589.25, "MLM loss": 1.7611678838729858, "epoch": 9.39, "step": 440999 }, { "epoch": 9.39, "learning_rate": 1.2079387755102042e-05, "loss": 1.8823, "step": 441000 }, { "FLOPS loss": 0.07112989574670792, "L0_d": 655.28, "MLM loss": 1.781064748764038, "epoch": 9.4, "step": 441499 }, { "epoch": 9.4, "learning_rate": 1.197734693877551e-05, "loss": 1.8905, "step": 441500 }, { "FLOPS loss": 0.06727603822946548, "L0_d": 627.89, "MLM loss": 1.7931089401245117, "epoch": 9.41, "step": 441999 }, { "epoch": 9.41, "learning_rate": 1.187530612244898e-05, "loss": 1.8821, "step": 442000 }, { "FLOPS loss": 0.08041385561227798, "L0_d": 826.0, "MLM loss": 1.8660180568695068, "epoch": 9.42, "step": 442499 }, { "epoch": 9.42, "learning_rate": 1.177326530612245e-05, "loss": 1.8863, "step": 442500 }, { "FLOPS loss": 0.06488444656133652, "L0_d": 650.3, "MLM loss": 1.5891389846801758, "epoch": 9.43, "step": 442999 }, { "epoch": 9.43, "learning_rate": 1.1671224489795919e-05, "loss": 1.8857, "step": 443000 }, { "FLOPS loss": 0.07908573746681213, "L0_d": 814.28, "MLM loss": 1.8341909646987915, "epoch": 9.44, "step": 443499 }, { "epoch": 9.44, "learning_rate": 1.1569387755102042e-05, "loss": 1.887, "step": 443500 }, { "FLOPS loss": 0.0627230852842331, "L0_d": 725.02, "MLM loss": 1.9674441814422607, "epoch": 9.46, "step": 443999 }, { "epoch": 9.46, "learning_rate": 1.1467346938775511e-05, "loss": 1.8837, "step": 444000 }, { "FLOPS loss": 0.07779854536056519, "L0_d": 792.72, "MLM loss": 1.77005934715271, "epoch": 9.47, "step": 444499 }, { "epoch": 9.47, "learning_rate": 1.1365306122448981e-05, "loss": 1.8831, "step": 444500 }, { "FLOPS loss": 0.06868050247430801, "L0_d": 651.2, "MLM loss": 1.7083113193511963, "epoch": 9.48, "step": 444999 }, { "epoch": 9.48, "learning_rate": 1.1263265306122449e-05, "loss": 1.884, "step": 445000 }, { "FLOPS loss": 0.07770167291164398, "L0_d": 732.28, "MLM loss": 1.8704335689544678, "epoch": 9.49, "step": 445499 }, { "epoch": 9.49, "learning_rate": 1.1161224489795919e-05, "loss": 1.8882, "step": 445500 }, { "FLOPS loss": 0.07393424212932587, "L0_d": 602.7, "MLM loss": 1.8852603435516357, "epoch": 9.5, "step": 445999 }, { "epoch": 9.5, "learning_rate": 1.1059387755102041e-05, "loss": 1.8845, "step": 446000 }, { "FLOPS loss": 0.061653368175029755, "L0_d": 612.22, "MLM loss": 1.9074640274047852, "epoch": 9.51, "step": 446499 }, { "epoch": 9.51, "learning_rate": 1.0957346938775511e-05, "loss": 1.8839, "step": 446500 }, { "FLOPS loss": 0.08355122804641724, "L0_d": 753.42, "MLM loss": 1.7888277769088745, "epoch": 9.52, "step": 446999 }, { "epoch": 9.52, "learning_rate": 1.085530612244898e-05, "loss": 1.8827, "step": 447000 }, { "FLOPS loss": 0.07092072814702988, "L0_d": 697.31, "MLM loss": 1.7585101127624512, "epoch": 9.53, "step": 447499 }, { "epoch": 9.53, "learning_rate": 1.0753265306122449e-05, "loss": 1.8809, "step": 447500 }, { "FLOPS loss": 0.07540085166692734, "L0_d": 741.02, "MLM loss": 1.7078624963760376, "epoch": 9.54, "step": 447999 }, { "epoch": 9.54, "learning_rate": 1.0651428571428571e-05, "loss": 1.8872, "step": 448000 }, { "FLOPS loss": 0.0650225356221199, "L0_d": 752.22, "MLM loss": 1.9339125156402588, "epoch": 9.55, "step": 448499 }, { "epoch": 9.55, "learning_rate": 1.0549387755102041e-05, "loss": 1.8863, "step": 448500 }, { "FLOPS loss": 0.06345243006944656, "L0_d": 790.52, "MLM loss": 1.9979722499847412, "epoch": 9.56, "step": 448999 }, { "epoch": 9.56, "learning_rate": 1.0447346938775511e-05, "loss": 1.8843, "step": 449000 }, { "FLOPS loss": 0.06612099707126617, "L0_d": 640.97, "MLM loss": 1.8012644052505493, "epoch": 9.57, "step": 449499 }, { "epoch": 9.57, "learning_rate": 1.034530612244898e-05, "loss": 1.8834, "step": 449500 }, { "FLOPS loss": 0.06571657210588455, "L0_d": 709.7, "MLM loss": 1.732346773147583, "epoch": 9.58, "step": 449999 }, { "epoch": 9.58, "learning_rate": 1.024326530612245e-05, "loss": 1.8825, "step": 450000 }, { "FLOPS loss": 0.09524022042751312, "L0_d": 713.88, "MLM loss": 1.8579025268554688, "epoch": 9.59, "step": 450499 }, { "epoch": 9.59, "learning_rate": 1.0141428571428573e-05, "loss": 1.8793, "step": 450500 }, { "FLOPS loss": 0.08708564192056656, "L0_d": 764.3, "MLM loss": 1.7488106489181519, "epoch": 9.6, "step": 450999 }, { "epoch": 9.6, "learning_rate": 1.0039387755102041e-05, "loss": 1.8842, "step": 451000 }, { "FLOPS loss": 0.06419456005096436, "L0_d": 609.41, "MLM loss": 1.728226661682129, "epoch": 9.61, "step": 451499 }, { "epoch": 9.61, "learning_rate": 9.93734693877551e-06, "loss": 1.8823, "step": 451500 }, { "FLOPS loss": 0.07620800286531448, "L0_d": 819.53, "MLM loss": 1.7481298446655273, "epoch": 9.63, "step": 451999 }, { "epoch": 9.63, "learning_rate": 9.83530612244898e-06, "loss": 1.8809, "step": 452000 }, { "FLOPS loss": 0.07101784646511078, "L0_d": 665.31, "MLM loss": 1.7855209112167358, "epoch": 9.64, "step": 452499 }, { "epoch": 9.64, "learning_rate": 9.733469387755103e-06, "loss": 1.8854, "step": 452500 }, { "FLOPS loss": 0.07623369246721268, "L0_d": 771.8, "MLM loss": 1.7835557460784912, "epoch": 9.65, "step": 452999 }, { "epoch": 9.65, "learning_rate": 9.631428571428573e-06, "loss": 1.8816, "step": 453000 }, { "FLOPS loss": 0.08137229084968567, "L0_d": 841.5, "MLM loss": 1.8653662204742432, "epoch": 9.66, "step": 453499 }, { "epoch": 9.66, "learning_rate": 9.52938775510204e-06, "loss": 1.8856, "step": 453500 }, { "FLOPS loss": 0.0767243355512619, "L0_d": 1113.84, "MLM loss": 1.8295722007751465, "epoch": 9.67, "step": 453999 }, { "epoch": 9.67, "learning_rate": 9.42734693877551e-06, "loss": 1.8776, "step": 454000 }, { "FLOPS loss": 0.078528992831707, "L0_d": 882.05, "MLM loss": 1.8933274745941162, "epoch": 9.68, "step": 454499 }, { "epoch": 9.68, "learning_rate": 9.325510204081633e-06, "loss": 1.8817, "step": 454500 }, { "FLOPS loss": 0.07128248363733292, "L0_d": 683.69, "MLM loss": 1.6863057613372803, "epoch": 9.69, "step": 454999 }, { "epoch": 9.69, "learning_rate": 9.223469387755103e-06, "loss": 1.8791, "step": 455000 }, { "FLOPS loss": 0.08068500459194183, "L0_d": 753.53, "MLM loss": 1.7771949768066406, "epoch": 9.7, "step": 455499 }, { "epoch": 9.7, "learning_rate": 9.121428571428572e-06, "loss": 1.8827, "step": 455500 }, { "FLOPS loss": 0.06115632876753807, "L0_d": 648.83, "MLM loss": 1.7620165348052979, "epoch": 9.71, "step": 455999 }, { "epoch": 9.71, "learning_rate": 9.01938775510204e-06, "loss": 1.8839, "step": 456000 }, { "FLOPS loss": 0.08328636735677719, "L0_d": 747.77, "MLM loss": 1.7784515619277954, "epoch": 9.72, "step": 456499 }, { "epoch": 9.72, "learning_rate": 8.91734693877551e-06, "loss": 1.8827, "step": 456500 }, { "FLOPS loss": 0.07326602935791016, "L0_d": 676.11, "MLM loss": 1.8248202800750732, "epoch": 9.73, "step": 456999 }, { "epoch": 9.73, "learning_rate": 8.815510204081633e-06, "loss": 1.8811, "step": 457000 }, { "FLOPS loss": 0.06962653249502182, "L0_d": 670.75, "MLM loss": 1.9677387475967407, "epoch": 9.74, "step": 457499 }, { "epoch": 9.74, "learning_rate": 8.713469387755102e-06, "loss": 1.8818, "step": 457500 }, { "FLOPS loss": 0.0688956156373024, "L0_d": 924.66, "MLM loss": 1.8603427410125732, "epoch": 9.75, "step": 457999 }, { "epoch": 9.75, "learning_rate": 8.611428571428572e-06, "loss": 1.8859, "step": 458000 }, { "FLOPS loss": 0.07736175507307053, "L0_d": 1061.88, "MLM loss": 1.8138039112091064, "epoch": 9.76, "step": 458499 }, { "epoch": 9.76, "learning_rate": 8.509387755102042e-06, "loss": 1.8817, "step": 458500 }, { "FLOPS loss": 0.0671883374452591, "L0_d": 722.67, "MLM loss": 1.7381740808486938, "epoch": 9.77, "step": 458999 }, { "epoch": 9.77, "learning_rate": 8.407551020408164e-06, "loss": 1.8798, "step": 459000 }, { "FLOPS loss": 0.07179486751556396, "L0_d": 825.41, "MLM loss": 1.8373209238052368, "epoch": 9.79, "step": 459499 }, { "epoch": 9.79, "learning_rate": 8.305510204081632e-06, "loss": 1.8839, "step": 459500 }, { "FLOPS loss": 0.059714049100875854, "L0_d": 530.77, "MLM loss": 1.7862880229949951, "epoch": 9.8, "step": 459999 }, { "epoch": 9.8, "learning_rate": 8.203469387755102e-06, "loss": 1.8817, "step": 460000 }, { "FLOPS loss": 0.0783819779753685, "L0_d": 960.45, "MLM loss": 1.8646221160888672, "epoch": 9.81, "step": 460499 }, { "epoch": 9.81, "learning_rate": 8.101428571428572e-06, "loss": 1.879, "step": 460500 }, { "FLOPS loss": 0.06765973567962646, "L0_d": 635.81, "MLM loss": 1.9023945331573486, "epoch": 9.82, "step": 460999 }, { "epoch": 9.82, "learning_rate": 7.999591836734694e-06, "loss": 1.8803, "step": 461000 }, { "FLOPS loss": 0.06713356822729111, "L0_d": 643.22, "MLM loss": 1.6709318161010742, "epoch": 9.83, "step": 461499 }, { "epoch": 9.83, "learning_rate": 7.897551020408164e-06, "loss": 1.8788, "step": 461500 }, { "FLOPS loss": 0.06832224130630493, "L0_d": 696.81, "MLM loss": 1.7683454751968384, "epoch": 9.84, "step": 461999 }, { "epoch": 9.84, "learning_rate": 7.795510204081632e-06, "loss": 1.8791, "step": 462000 }, { "FLOPS loss": 0.08137806504964828, "L0_d": 629.3, "MLM loss": 1.7359639406204224, "epoch": 9.85, "step": 462499 }, { "epoch": 9.85, "learning_rate": 7.693469387755102e-06, "loss": 1.8806, "step": 462500 }, { "FLOPS loss": 0.07944727689027786, "L0_d": 1020.34, "MLM loss": 1.6010856628417969, "epoch": 9.86, "step": 462999 }, { "epoch": 9.86, "learning_rate": 7.591428571428572e-06, "loss": 1.8793, "step": 463000 }, { "FLOPS loss": 0.06799176335334778, "L0_d": 662.75, "MLM loss": 1.7740750312805176, "epoch": 9.87, "step": 463499 }, { "epoch": 9.87, "learning_rate": 7.489387755102041e-06, "loss": 1.8779, "step": 463500 }, { "FLOPS loss": 0.08271846920251846, "L0_d": 764.22, "MLM loss": 1.8342242240905762, "epoch": 9.88, "step": 463999 }, { "epoch": 9.88, "learning_rate": 7.387551020408163e-06, "loss": 1.876, "step": 464000 }, { "FLOPS loss": 0.0854814201593399, "L0_d": 922.66, "MLM loss": 1.876578450202942, "epoch": 9.89, "step": 464499 }, { "epoch": 9.89, "learning_rate": 7.285510204081633e-06, "loss": 1.8791, "step": 464500 }, { "FLOPS loss": 0.10036221146583557, "L0_d": 907.89, "MLM loss": 1.777255654335022, "epoch": 9.9, "step": 464999 }, { "epoch": 9.9, "learning_rate": 7.183469387755103e-06, "loss": 1.8797, "step": 465000 }, { "FLOPS loss": 0.06955206394195557, "L0_d": 749.52, "MLM loss": 1.621423363685608, "epoch": 9.91, "step": 465499 }, { "epoch": 9.91, "learning_rate": 7.0814285714285725e-06, "loss": 1.876, "step": 465500 }, { "FLOPS loss": 0.06892334669828415, "L0_d": 697.23, "MLM loss": 1.743448257446289, "epoch": 9.92, "step": 465999 }, { "epoch": 9.92, "learning_rate": 6.979591836734695e-06, "loss": 1.8819, "step": 466000 }, { "FLOPS loss": 0.0751122310757637, "L0_d": 797.48, "MLM loss": 1.9870507717132568, "epoch": 9.93, "step": 466499 }, { "epoch": 9.93, "learning_rate": 6.877551020408164e-06, "loss": 1.8828, "step": 466500 }, { "FLOPS loss": 0.08745686709880829, "L0_d": 953.25, "MLM loss": 1.8826556205749512, "epoch": 9.95, "step": 466999 }, { "epoch": 9.95, "learning_rate": 6.775510204081633e-06, "loss": 1.878, "step": 467000 }, { "FLOPS loss": 0.07517757266759872, "L0_d": 826.22, "MLM loss": 1.7692055702209473, "epoch": 9.96, "step": 467499 }, { "epoch": 9.96, "learning_rate": 6.673469387755102e-06, "loss": 1.8798, "step": 467500 }, { "FLOPS loss": 0.08363378793001175, "L0_d": 745.77, "MLM loss": 1.752866506576538, "epoch": 9.97, "step": 467999 }, { "epoch": 9.97, "learning_rate": 6.571632653061224e-06, "loss": 1.8782, "step": 468000 }, { "FLOPS loss": 0.05404847860336304, "L0_d": 604.16, "MLM loss": 1.909287691116333, "epoch": 9.98, "step": 468499 }, { "epoch": 9.98, "learning_rate": 6.469591836734694e-06, "loss": 1.8779, "step": 468500 }, { "FLOPS loss": 0.09000225365161896, "L0_d": 819.59, "MLM loss": 1.7899792194366455, "epoch": 9.99, "step": 468999 }, { "epoch": 9.99, "learning_rate": 6.367551020408164e-06, "loss": 1.8752, "step": 469000 }, { "FLOPS loss": 0.07309328764677048, "L0_d": 703.8, "MLM loss": 1.8317029476165771, "epoch": 10.0, "step": 469499 }, { "epoch": 10.0, "learning_rate": 6.265510204081633e-06, "loss": 1.8769, "step": 469500 }, { "FLOPS loss": 0.06383686512708664, "L0_d": 687.62, "MLM loss": 1.7126272916793823, "epoch": 10.01, "step": 469999 }, { "epoch": 10.01, "learning_rate": 6.163469387755102e-06, "loss": 1.8771, "step": 470000 }, { "FLOPS loss": 0.07466043531894684, "L0_d": 799.62, "MLM loss": 1.8500142097473145, "epoch": 10.02, "step": 470499 }, { "epoch": 10.02, "learning_rate": 6.061428571428571e-06, "loss": 1.8747, "step": 470500 }, { "FLOPS loss": 0.07076152414083481, "L0_d": 837.88, "MLM loss": 1.8527822494506836, "epoch": 10.03, "step": 470999 }, { "epoch": 10.03, "learning_rate": 5.959591836734694e-06, "loss": 1.8736, "step": 471000 }, { "FLOPS loss": 0.07877156138420105, "L0_d": 717.0, "MLM loss": 1.727238416671753, "epoch": 10.04, "step": 471499 }, { "epoch": 10.04, "learning_rate": 5.857551020408163e-06, "loss": 1.8719, "step": 471500 }, { "FLOPS loss": 0.07525794953107834, "L0_d": 743.8, "MLM loss": 1.7471095323562622, "epoch": 10.05, "step": 471999 }, { "epoch": 10.05, "learning_rate": 5.755510204081633e-06, "loss": 1.8739, "step": 472000 }, { "FLOPS loss": 0.06217565760016441, "L0_d": 702.58, "MLM loss": 1.8815577030181885, "epoch": 10.06, "step": 472499 }, { "epoch": 10.06, "learning_rate": 5.653469387755102e-06, "loss": 1.8783, "step": 472500 }, { "FLOPS loss": 0.08705900609493256, "L0_d": 895.94, "MLM loss": 1.7565627098083496, "epoch": 10.07, "step": 472999 }, { "epoch": 10.07, "learning_rate": 5.5516326530612245e-06, "loss": 1.8748, "step": 473000 }, { "FLOPS loss": 0.056544363498687744, "L0_d": 588.73, "MLM loss": 1.7726197242736816, "epoch": 10.08, "step": 473499 }, { "epoch": 10.08, "learning_rate": 5.449591836734694e-06, "loss": 1.8741, "step": 473500 }, { "FLOPS loss": 0.06269951909780502, "L0_d": 648.5, "MLM loss": 1.9060299396514893, "epoch": 10.09, "step": 473999 }, { "epoch": 10.09, "learning_rate": 5.347551020408163e-06, "loss": 1.8779, "step": 474000 }, { "FLOPS loss": 0.08251946419477463, "L0_d": 952.05, "MLM loss": 1.7872275114059448, "epoch": 10.1, "step": 474499 }, { "epoch": 10.1, "learning_rate": 5.245510204081633e-06, "loss": 1.8746, "step": 474500 }, { "FLOPS loss": 0.07168962806463242, "L0_d": 652.42, "MLM loss": 1.715721845626831, "epoch": 10.12, "step": 474999 }, { "epoch": 10.12, "learning_rate": 5.143469387755103e-06, "loss": 1.8689, "step": 475000 }, { "FLOPS loss": 0.06996005773544312, "L0_d": 860.59, "MLM loss": 1.854485034942627, "epoch": 10.13, "step": 475499 }, { "epoch": 10.13, "learning_rate": 5.041632653061225e-06, "loss": 1.8754, "step": 475500 }, { "FLOPS loss": 0.0643070861697197, "L0_d": 657.98, "MLM loss": 1.689789056777954, "epoch": 10.14, "step": 475999 }, { "epoch": 10.14, "learning_rate": 4.939591836734694e-06, "loss": 1.8756, "step": 476000 }, { "FLOPS loss": 0.08355584740638733, "L0_d": 897.23, "MLM loss": 1.758540153503418, "epoch": 10.15, "step": 476499 }, { "epoch": 10.15, "learning_rate": 4.837551020408163e-06, "loss": 1.8762, "step": 476500 }, { "FLOPS loss": 0.08008190989494324, "L0_d": 704.61, "MLM loss": 1.6079193353652954, "epoch": 10.16, "step": 476999 }, { "epoch": 10.16, "learning_rate": 4.735510204081633e-06, "loss": 1.8762, "step": 477000 }, { "FLOPS loss": 0.06911767274141312, "L0_d": 871.81, "MLM loss": 1.8158210515975952, "epoch": 10.17, "step": 477499 }, { "epoch": 10.17, "learning_rate": 4.633673469387755e-06, "loss": 1.8744, "step": 477500 }, { "FLOPS loss": 0.08581630885601044, "L0_d": 720.11, "MLM loss": 1.918521523475647, "epoch": 10.18, "step": 477999 }, { "epoch": 10.18, "learning_rate": 4.531632653061225e-06, "loss": 1.8723, "step": 478000 }, { "FLOPS loss": 0.09056857973337173, "L0_d": 878.39, "MLM loss": 1.8207356929779053, "epoch": 10.19, "step": 478499 }, { "epoch": 10.19, "learning_rate": 4.429591836734695e-06, "loss": 1.8751, "step": 478500 }, { "FLOPS loss": 0.08066496253013611, "L0_d": 708.09, "MLM loss": 1.8024837970733643, "epoch": 10.2, "step": 478999 }, { "epoch": 10.2, "learning_rate": 4.327551020408163e-06, "loss": 1.873, "step": 479000 }, { "FLOPS loss": 0.08622293174266815, "L0_d": 819.22, "MLM loss": 1.7358485460281372, "epoch": 10.21, "step": 479499 }, { "epoch": 10.21, "learning_rate": 4.225714285714286e-06, "loss": 1.878, "step": 479500 }, { "FLOPS loss": 0.08763483911752701, "L0_d": 772.67, "MLM loss": 1.7484935522079468, "epoch": 10.22, "step": 479999 }, { "epoch": 10.22, "learning_rate": 4.123673469387755e-06, "loss": 1.8734, "step": 480000 }, { "FLOPS loss": 0.06542215496301651, "L0_d": 816.7, "MLM loss": 1.807370662689209, "epoch": 10.23, "step": 480499 }, { "epoch": 10.23, "learning_rate": 4.021632653061225e-06, "loss": 1.874, "step": 480500 }, { "FLOPS loss": 0.06360576301813126, "L0_d": 633.91, "MLM loss": 1.731799840927124, "epoch": 10.24, "step": 480999 }, { "epoch": 10.24, "learning_rate": 3.9195918367346945e-06, "loss": 1.8732, "step": 481000 }, { "FLOPS loss": 0.06134741008281708, "L0_d": 653.84, "MLM loss": 1.7690579891204834, "epoch": 10.25, "step": 481499 }, { "epoch": 10.25, "learning_rate": 3.817755102040817e-06, "loss": 1.8712, "step": 481500 }, { "FLOPS loss": 0.06777947396039963, "L0_d": 1139.48, "MLM loss": 1.770931601524353, "epoch": 10.26, "step": 481999 }, { "epoch": 10.26, "learning_rate": 3.7157142857142854e-06, "loss": 1.8771, "step": 482000 }, { "FLOPS loss": 0.05449703335762024, "L0_d": 670.72, "MLM loss": 1.8757489919662476, "epoch": 10.28, "step": 482499 }, { "epoch": 10.28, "learning_rate": 3.613673469387755e-06, "loss": 1.8739, "step": 482500 }, { "FLOPS loss": 0.06846970319747925, "L0_d": 700.67, "MLM loss": 1.851085901260376, "epoch": 10.29, "step": 482999 }, { "epoch": 10.29, "learning_rate": 3.5116326530612245e-06, "loss": 1.871, "step": 483000 }, { "FLOPS loss": 0.0666121169924736, "L0_d": 764.92, "MLM loss": 1.8932390213012695, "epoch": 10.3, "step": 483499 }, { "epoch": 10.3, "learning_rate": 3.4095918367346943e-06, "loss": 1.8723, "step": 483500 }, { "FLOPS loss": 0.06457451730966568, "L0_d": 554.03, "MLM loss": 1.8126370906829834, "epoch": 10.31, "step": 483999 }, { "epoch": 10.31, "learning_rate": 3.3077551020408167e-06, "loss": 1.8745, "step": 484000 }, { "FLOPS loss": 0.09475447982549667, "L0_d": 946.62, "MLM loss": 1.9367070198059082, "epoch": 10.32, "step": 484499 }, { "epoch": 10.32, "learning_rate": 3.205714285714286e-06, "loss": 1.8732, "step": 484500 }, { "FLOPS loss": 0.07273074239492416, "L0_d": 770.86, "MLM loss": 1.8612580299377441, "epoch": 10.33, "step": 484999 }, { "epoch": 10.33, "learning_rate": 3.1036734693877554e-06, "loss": 1.8739, "step": 485000 }, { "FLOPS loss": 0.06570863723754883, "L0_d": 606.78, "MLM loss": 1.8435473442077637, "epoch": 10.34, "step": 485499 }, { "epoch": 10.34, "learning_rate": 3.0016326530612248e-06, "loss": 1.8713, "step": 485500 }, { "FLOPS loss": 0.07231251895427704, "L0_d": 614.42, "MLM loss": 1.7718569040298462, "epoch": 10.35, "step": 485999 }, { "epoch": 10.35, "learning_rate": 2.899795918367347e-06, "loss": 1.8732, "step": 486000 }, { "FLOPS loss": 0.06499864161014557, "L0_d": 571.12, "MLM loss": 1.8375295400619507, "epoch": 10.36, "step": 486499 }, { "epoch": 10.36, "learning_rate": 2.7977551020408165e-06, "loss": 1.8758, "step": 486500 }, { "FLOPS loss": 0.06405822932720184, "L0_d": 1054.19, "MLM loss": 1.9204262495040894, "epoch": 10.37, "step": 486999 }, { "epoch": 10.37, "learning_rate": 2.695714285714286e-06, "loss": 1.8758, "step": 487000 }, { "FLOPS loss": 0.07824577391147614, "L0_d": 723.75, "MLM loss": 1.8445980548858643, "epoch": 10.38, "step": 487499 }, { "epoch": 10.38, "learning_rate": 2.5936734693877552e-06, "loss": 1.8742, "step": 487500 }, { "FLOPS loss": 0.07497908174991608, "L0_d": 718.8, "MLM loss": 1.6689207553863525, "epoch": 10.39, "step": 487999 }, { "epoch": 10.39, "learning_rate": 2.4916326530612246e-06, "loss": 1.8707, "step": 488000 }, { "FLOPS loss": 0.07021187990903854, "L0_d": 646.23, "MLM loss": 1.7981057167053223, "epoch": 10.4, "step": 488499 }, { "epoch": 10.4, "learning_rate": 2.389795918367347e-06, "loss": 1.8736, "step": 488500 }, { "FLOPS loss": 0.07197225093841553, "L0_d": 839.08, "MLM loss": 1.9683432579040527, "epoch": 10.41, "step": 488999 }, { "epoch": 10.41, "learning_rate": 2.2877551020408167e-06, "loss": 1.8734, "step": 489000 }, { "FLOPS loss": 0.06169842928647995, "L0_d": 680.11, "MLM loss": 1.848118543624878, "epoch": 10.42, "step": 489499 }, { "epoch": 10.42, "learning_rate": 2.1857142857142857e-06, "loss": 1.8705, "step": 489500 }, { "FLOPS loss": 0.06455899775028229, "L0_d": 807.06, "MLM loss": 1.9502975940704346, "epoch": 10.43, "step": 489999 }, { "epoch": 10.43, "learning_rate": 2.0836734693877554e-06, "loss": 1.8699, "step": 490000 }, { "FLOPS loss": 0.08180254697799683, "L0_d": 711.53, "MLM loss": 1.8077306747436523, "epoch": 10.45, "step": 490499 }, { "epoch": 10.45, "learning_rate": 1.9816326530612244e-06, "loss": 1.8721, "step": 490500 }, { "FLOPS loss": 0.06761618703603745, "L0_d": 811.06, "MLM loss": 1.7293885946273804, "epoch": 10.46, "step": 490999 }, { "epoch": 10.46, "learning_rate": 1.879795918367347e-06, "loss": 1.8726, "step": 491000 }, { "FLOPS loss": 0.08343058079481125, "L0_d": 1113.06, "MLM loss": 1.7562888860702515, "epoch": 10.47, "step": 491499 }, { "epoch": 10.47, "learning_rate": 1.7777551020408165e-06, "loss": 1.8713, "step": 491500 }, { "FLOPS loss": 0.07647604495286942, "L0_d": 812.47, "MLM loss": 1.814285159111023, "epoch": 10.48, "step": 491999 }, { "epoch": 10.48, "learning_rate": 1.6757142857142857e-06, "loss": 1.8729, "step": 492000 }, { "FLOPS loss": 0.0742335394024849, "L0_d": 725.94, "MLM loss": 1.7169965505599976, "epoch": 10.49, "step": 492499 }, { "epoch": 10.49, "learning_rate": 1.5736734693877552e-06, "loss": 1.8707, "step": 492500 }, { "FLOPS loss": 0.07749656587839127, "L0_d": 740.7, "MLM loss": 1.8546206951141357, "epoch": 10.5, "step": 492999 }, { "epoch": 10.5, "learning_rate": 1.4718367346938776e-06, "loss": 1.8758, "step": 493000 }, { "FLOPS loss": 0.05815978720784187, "L0_d": 632.03, "MLM loss": 1.906503438949585, "epoch": 10.51, "step": 493499 }, { "epoch": 10.51, "learning_rate": 1.369795918367347e-06, "loss": 1.8754, "step": 493500 }, { "FLOPS loss": 0.05534592270851135, "L0_d": 531.83, "MLM loss": 1.8600013256072998, "epoch": 10.52, "step": 493999 }, { "epoch": 10.52, "learning_rate": 1.2677551020408163e-06, "loss": 1.8691, "step": 494000 }, { "FLOPS loss": 0.07069820165634155, "L0_d": 716.92, "MLM loss": 1.6526392698287964, "epoch": 10.53, "step": 494499 }, { "epoch": 10.53, "learning_rate": 1.1657142857142857e-06, "loss": 1.8698, "step": 494500 }, { "FLOPS loss": 0.08306233584880829, "L0_d": 738.41, "MLM loss": 1.822631597518921, "epoch": 10.54, "step": 494999 }, { "epoch": 10.54, "learning_rate": 1.0638775510204083e-06, "loss": 1.8754, "step": 495000 }, { "FLOPS loss": 0.0737542062997818, "L0_d": 657.59, "MLM loss": 1.7241982221603394, "epoch": 10.55, "step": 495499 }, { "epoch": 10.55, "learning_rate": 9.618367346938776e-07, "loss": 1.8712, "step": 495500 }, { "FLOPS loss": 0.08165741711854935, "L0_d": 817.86, "MLM loss": 1.7989674806594849, "epoch": 10.56, "step": 495999 }, { "epoch": 10.56, "learning_rate": 8.59795918367347e-07, "loss": 1.8706, "step": 496000 }, { "FLOPS loss": 0.0812465101480484, "L0_d": 775.12, "MLM loss": 1.8955512046813965, "epoch": 10.57, "step": 496499 }, { "epoch": 10.57, "learning_rate": 7.577551020408163e-07, "loss": 1.8724, "step": 496500 }, { "FLOPS loss": 0.08214175701141357, "L0_d": 734.98, "MLM loss": 1.8664565086364746, "epoch": 10.58, "step": 496999 }, { "epoch": 10.58, "learning_rate": 6.557142857142857e-07, "loss": 1.8723, "step": 497000 }, { "FLOPS loss": 0.07855507731437683, "L0_d": 850.05, "MLM loss": 1.7129387855529785, "epoch": 10.59, "step": 497499 }, { "epoch": 10.59, "learning_rate": 5.538775510204082e-07, "loss": 1.8718, "step": 497500 }, { "FLOPS loss": 0.069860078394413, "L0_d": 696.36, "MLM loss": 1.6870092153549194, "epoch": 10.61, "step": 497999 }, { "epoch": 10.61, "learning_rate": 4.5183673469387754e-07, "loss": 1.8679, "step": 498000 }, { "FLOPS loss": 0.065401092171669, "L0_d": 664.92, "MLM loss": 1.7763365507125854, "epoch": 10.62, "step": 498499 }, { "epoch": 10.62, "learning_rate": 3.4979591836734695e-07, "loss": 1.8715, "step": 498500 }, { "FLOPS loss": 0.07126881182193756, "L0_d": 922.78, "MLM loss": 1.885299563407898, "epoch": 10.63, "step": 498999 }, { "epoch": 10.63, "learning_rate": 2.477551020408163e-07, "loss": 1.8741, "step": 499000 }, { "FLOPS loss": 0.07045704871416092, "L0_d": 685.06, "MLM loss": 1.7598927021026611, "epoch": 10.64, "step": 499499 }, { "epoch": 10.64, "learning_rate": 1.457142857142857e-07, "loss": 1.8754, "step": 499500 }, { "FLOPS loss": 0.07906313985586166, "L0_d": 1157.08, "MLM loss": 1.733157753944397, "epoch": 10.65, "step": 499999 }, { "epoch": 10.65, "learning_rate": 4.3877551020408164e-08, "loss": 1.8715, "step": 500000 }, { "epoch": 10.65, "step": 500001, "total_flos": 8.484077462446146e+18, "train_loss": 3.6746194780010343e-06, "train_runtime": 301.9815, "train_samples_per_second": 423867.085, "train_steps_per_second": 1655.731 } ], "max_steps": 500000, "num_train_epochs": 11, "total_flos": 8.484077462446146e+18, "trial_name": null, "trial_params": null }