{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.548896879300395, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "FLOPS loss": 9.565525397192687e-05, "L0_d": 24056.38, "MLM loss": 9.09946060180664, "epoch": 0.01, "step": 499 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 9.5975, "step": 500 }, { "FLOPS loss": 0.000949572422541678, "L0_d": 28601.91, "MLM loss": 8.23979377746582, "epoch": 0.01, "step": 999 }, { "epoch": 0.01, "learning_rate": 1e-05, "loss": 8.5554, "step": 1000 }, { "FLOPS loss": 0.0030820632819086313, "L0_d": 29087.12, "MLM loss": 8.02900505065918, "epoch": 0.02, "step": 1499 }, { "epoch": 0.02, "learning_rate": 1.5e-05, "loss": 8.0914, "step": 1500 }, { "FLOPS loss": 0.006640450097620487, "L0_d": 29219.52, "MLM loss": 7.837193489074707, "epoch": 0.02, "step": 1999 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 7.9209, "step": 2000 }, { "FLOPS loss": 0.011471221223473549, "L0_d": 29325.81, "MLM loss": 7.7792205810546875, "epoch": 0.03, "step": 2499 }, { "epoch": 0.03, "learning_rate": 2.5e-05, "loss": 7.813, "step": 2500 }, { "FLOPS loss": 0.01776166260242462, "L0_d": 28927.25, "MLM loss": 7.739745616912842, "epoch": 0.03, "step": 2999 }, { "epoch": 0.03, "learning_rate": 3e-05, "loss": 7.7168, "step": 3000 }, { "FLOPS loss": 0.025191370397806168, "L0_d": 28969.59, "MLM loss": 7.602524757385254, "epoch": 0.04, "step": 3499 }, { "epoch": 0.04, "learning_rate": 3.5e-05, "loss": 7.6089, "step": 3500 }, { "FLOPS loss": 0.03225690871477127, "L0_d": 28825.02, "MLM loss": 7.034076690673828, "epoch": 0.04, "step": 3999 }, { "epoch": 0.04, "learning_rate": 4e-05, "loss": 7.3594, "step": 4000 }, { "FLOPS loss": 0.041860684752464294, "L0_d": 28864.8, "MLM loss": 6.668835639953613, "epoch": 0.05, "step": 4499 }, { "epoch": 0.05, "learning_rate": 4.5e-05, "loss": 6.9181, "step": 4500 }, { "FLOPS loss": 0.055579572916030884, "L0_d": 28292.02, "MLM loss": 6.369210243225098, "epoch": 0.06, "step": 4999 }, { "epoch": 0.06, "learning_rate": 5e-05, "loss": 6.4982, "step": 5000 }, { "FLOPS loss": 0.06552041321992874, "L0_d": 27784.97, "MLM loss": 6.052916049957275, "epoch": 0.06, "step": 5499 }, { "epoch": 0.06, "learning_rate": 5.500000000000001e-05, "loss": 6.1667, "step": 5500 }, { "FLOPS loss": 0.06926894187927246, "L0_d": 27378.45, "MLM loss": 5.649182319641113, "epoch": 0.07, "step": 5999 }, { "epoch": 0.07, "learning_rate": 6e-05, "loss": 5.8862, "step": 6000 }, { "FLOPS loss": 0.06021267920732498, "L0_d": 25159.61, "MLM loss": 5.459830284118652, "epoch": 0.07, "step": 6499 }, { "epoch": 0.07, "learning_rate": 6.500000000000001e-05, "loss": 5.6134, "step": 6500 }, { "FLOPS loss": 0.049352116882801056, "L0_d": 21979.23, "MLM loss": 5.2327165603637695, "epoch": 0.08, "step": 6999 }, { "epoch": 0.08, "learning_rate": 7e-05, "loss": 5.3441, "step": 7000 }, { "FLOPS loss": 0.03712235763669014, "L0_d": 19156.66, "MLM loss": 4.846562385559082, "epoch": 0.08, "step": 7499 }, { "epoch": 0.08, "learning_rate": 7.500000000000001e-05, "loss": 5.0493, "step": 7500 }, { "FLOPS loss": 0.038594800978899, "L0_d": 17594.55, "MLM loss": 4.341317176818848, "epoch": 0.09, "step": 7999 }, { "epoch": 0.09, "learning_rate": 8e-05, "loss": 4.7729, "step": 8000 }, { "FLOPS loss": 0.034570008516311646, "L0_d": 16728.05, "MLM loss": 4.398600101470947, "epoch": 0.09, "step": 8499 }, { "epoch": 0.09, "learning_rate": 8.5e-05, "loss": 4.5531, "step": 8500 }, { "FLOPS loss": 0.028350943699479103, "L0_d": 13310.48, "MLM loss": 4.156666278839111, "epoch": 0.1, "step": 8999 }, { "epoch": 0.1, "learning_rate": 9e-05, "loss": 4.3907, "step": 9000 }, { "FLOPS loss": 0.03558973968029022, "L0_d": 13780.98, "MLM loss": 4.288907527923584, "epoch": 0.11, "step": 9499 }, { "epoch": 0.11, "learning_rate": 9.499e-05, "loss": 4.2496, "step": 9500 }, { "FLOPS loss": 0.030856303870677948, "L0_d": 12326.91, "MLM loss": 4.153857231140137, "epoch": 0.11, "step": 9999 }, { "epoch": 0.11, "learning_rate": 9.999000000000001e-05, "loss": 4.143, "step": 10000 }, { "FLOPS loss": 0.026871612295508385, "L0_d": 10626.25, "MLM loss": 3.8834831714630127, "epoch": 0.12, "step": 10499 }, { "epoch": 0.12, "learning_rate": 9.989816326530613e-05, "loss": 4.0389, "step": 10500 }, { "FLOPS loss": 0.028217237442731857, "L0_d": 10158.17, "MLM loss": 3.9724059104919434, "epoch": 0.12, "step": 10999 }, { "epoch": 0.12, "learning_rate": 9.97961224489796e-05, "loss": 3.9454, "step": 11000 }, { "FLOPS loss": 0.02496383525431156, "L0_d": 8790.8, "MLM loss": 3.530895471572876, "epoch": 0.13, "step": 11499 }, { "epoch": 0.13, "learning_rate": 9.969428571428572e-05, "loss": 3.8667, "step": 11500 }, { "FLOPS loss": 0.027112236246466637, "L0_d": 8648.06, "MLM loss": 3.668597936630249, "epoch": 0.13, "step": 11999 }, { "epoch": 0.13, "learning_rate": 9.959224489795919e-05, "loss": 3.8018, "step": 12000 }, { "FLOPS loss": 0.020357422530651093, "L0_d": 6906.2, "MLM loss": 3.650007486343384, "epoch": 0.14, "step": 12499 }, { "epoch": 0.14, "learning_rate": 9.949020408163265e-05, "loss": 3.7269, "step": 12500 }, { "FLOPS loss": 0.017766710370779037, "L0_d": 6252.44, "MLM loss": 3.341287612915039, "epoch": 0.14, "step": 12999 }, { "epoch": 0.14, "learning_rate": 9.938816326530612e-05, "loss": 3.6711, "step": 13000 }, { "FLOPS loss": 0.022355731576681137, "L0_d": 6912.27, "MLM loss": 3.6506505012512207, "epoch": 0.15, "step": 13499 }, { "epoch": 0.15, "learning_rate": 9.928632653061225e-05, "loss": 3.6099, "step": 13500 }, { "FLOPS loss": 0.027182623744010925, "L0_d": 6369.19, "MLM loss": 3.3572659492492676, "epoch": 0.16, "step": 13999 }, { "epoch": 0.16, "learning_rate": 9.918448979591837e-05, "loss": 3.5653, "step": 14000 }, { "FLOPS loss": 0.01985126920044422, "L0_d": 5742.23, "MLM loss": 3.572293996810913, "epoch": 0.16, "step": 14499 }, { "epoch": 0.16, "learning_rate": 9.908244897959185e-05, "loss": 3.5203, "step": 14500 }, { "FLOPS loss": 0.023559996858239174, "L0_d": 5665.14, "MLM loss": 3.5261945724487305, "epoch": 0.17, "step": 14999 }, { "epoch": 0.17, "learning_rate": 9.898040816326532e-05, "loss": 3.4748, "step": 15000 }, { "FLOPS loss": 0.0277397520840168, "L0_d": 6120.59, "MLM loss": 3.3022494316101074, "epoch": 0.17, "step": 15499 }, { "epoch": 0.17, "learning_rate": 9.887836734693878e-05, "loss": 3.4441, "step": 15500 }, { "FLOPS loss": 0.01913982443511486, "L0_d": 4547.16, "MLM loss": 3.3947341442108154, "epoch": 0.18, "step": 15999 }, { "epoch": 0.18, "learning_rate": 9.877632653061225e-05, "loss": 3.4028, "step": 16000 }, { "FLOPS loss": 0.02665124647319317, "L0_d": 5248.73, "MLM loss": 3.26316499710083, "epoch": 0.18, "step": 16499 }, { "epoch": 0.18, "learning_rate": 9.867448979591837e-05, "loss": 3.376, "step": 16500 }, { "FLOPS loss": 0.0213327594101429, "L0_d": 4548.09, "MLM loss": 3.4196789264678955, "epoch": 0.19, "step": 16999 }, { "epoch": 0.19, "learning_rate": 9.857244897959183e-05, "loss": 3.343, "step": 17000 }, { "FLOPS loss": 0.01892854832112789, "L0_d": 3681.11, "MLM loss": 3.2202186584472656, "epoch": 0.19, "step": 17499 }, { "epoch": 0.19, "learning_rate": 9.847040816326531e-05, "loss": 3.3123, "step": 17500 }, { "FLOPS loss": 0.02094082348048687, "L0_d": 3671.39, "MLM loss": 3.3424830436706543, "epoch": 0.2, "step": 17999 }, { "epoch": 0.2, "learning_rate": 9.836836734693879e-05, "loss": 3.2883, "step": 18000 }, { "FLOPS loss": 0.02323000319302082, "L0_d": 3737.8, "MLM loss": 3.143867015838623, "epoch": 0.21, "step": 18499 }, { "epoch": 0.21, "learning_rate": 9.826632653061225e-05, "loss": 3.2642, "step": 18500 }, { "FLOPS loss": 0.030227798968553543, "L0_d": 4385.75, "MLM loss": 3.2854490280151367, "epoch": 0.21, "step": 18999 }, { "epoch": 0.21, "learning_rate": 9.816448979591837e-05, "loss": 3.2353, "step": 19000 }, { "FLOPS loss": 0.032165978103876114, "L0_d": 4218.44, "MLM loss": 3.289926528930664, "epoch": 0.22, "step": 19499 }, { "epoch": 0.22, "learning_rate": 9.806244897959184e-05, "loss": 3.2196, "step": 19500 }, { "FLOPS loss": 0.027960574254393578, "L0_d": 3544.02, "MLM loss": 3.489745616912842, "epoch": 0.22, "step": 19999 }, { "epoch": 0.22, "learning_rate": 9.796040816326532e-05, "loss": 3.1959, "step": 20000 }, { "FLOPS loss": 0.028771378099918365, "L0_d": 3797.88, "MLM loss": 3.236794948577881, "epoch": 0.23, "step": 20499 }, { "epoch": 0.23, "learning_rate": 9.785836734693878e-05, "loss": 3.1793, "step": 20500 }, { "FLOPS loss": 0.03428131714463234, "L0_d": 4074.86, "MLM loss": 3.073704242706299, "epoch": 0.23, "step": 20999 }, { "epoch": 0.23, "learning_rate": 9.77565306122449e-05, "loss": 3.1605, "step": 21000 }, { "FLOPS loss": 0.022619003430008888, "L0_d": 2783.59, "MLM loss": 2.9201207160949707, "epoch": 0.24, "step": 21499 }, { "epoch": 0.24, "learning_rate": 9.765448979591837e-05, "loss": 3.1473, "step": 21500 }, { "FLOPS loss": 0.025145038962364197, "L0_d": 2707.75, "MLM loss": 3.186601161956787, "epoch": 0.24, "step": 21999 }, { "epoch": 0.24, "learning_rate": 9.755244897959183e-05, "loss": 3.1237, "step": 22000 }, { "FLOPS loss": 0.03952503949403763, "L0_d": 3897.7, "MLM loss": 3.0274620056152344, "epoch": 0.25, "step": 22499 }, { "epoch": 0.25, "learning_rate": 9.745040816326531e-05, "loss": 3.1115, "step": 22500 }, { "FLOPS loss": 0.03068322129547596, "L0_d": 2821.19, "MLM loss": 3.2197201251983643, "epoch": 0.26, "step": 22999 }, { "epoch": 0.26, "learning_rate": 9.734857142857143e-05, "loss": 3.0942, "step": 23000 }, { "FLOPS loss": 0.023581411689519882, "L0_d": 2503.62, "MLM loss": 3.2678167819976807, "epoch": 0.26, "step": 23499 }, { "epoch": 0.26, "learning_rate": 9.724653061224491e-05, "loss": 3.0783, "step": 23500 }, { "FLOPS loss": 0.040974121540784836, "L0_d": 3216.75, "MLM loss": 3.0708656311035156, "epoch": 0.27, "step": 23999 }, { "epoch": 0.27, "learning_rate": 9.714448979591837e-05, "loss": 3.0668, "step": 24000 }, { "FLOPS loss": 0.033168986439704895, "L0_d": 2725.14, "MLM loss": 2.9523184299468994, "epoch": 0.27, "step": 24499 }, { "epoch": 0.27, "learning_rate": 9.704244897959184e-05, "loss": 3.0547, "step": 24500 }, { "FLOPS loss": 0.027544423937797546, "L0_d": 2328.88, "MLM loss": 2.959336280822754, "epoch": 0.28, "step": 24999 }, { "epoch": 0.28, "learning_rate": 9.694061224489797e-05, "loss": 3.0361, "step": 25000 }, { "FLOPS loss": 0.03576379641890526, "L0_d": 2543.92, "MLM loss": 2.711780309677124, "epoch": 0.28, "step": 25499 }, { "epoch": 0.28, "learning_rate": 9.683857142857144e-05, "loss": 3.0225, "step": 25500 }, { "FLOPS loss": 0.03152213618159294, "L0_d": 2318.56, "MLM loss": 3.0007882118225098, "epoch": 0.29, "step": 25999 }, { "epoch": 0.29, "learning_rate": 9.67365306122449e-05, "loss": 3.0176, "step": 26000 }, { "FLOPS loss": 0.029536589980125427, "L0_d": 2165.81, "MLM loss": 2.842677116394043, "epoch": 0.29, "step": 26499 }, { "epoch": 0.29, "learning_rate": 9.663448979591837e-05, "loss": 3.0005, "step": 26500 }, { "FLOPS loss": 0.036029718816280365, "L0_d": 2718.91, "MLM loss": 2.899057388305664, "epoch": 0.3, "step": 26999 }, { "epoch": 0.3, "learning_rate": 9.65326530612245e-05, "loss": 2.9933, "step": 27000 }, { "FLOPS loss": 0.03214843571186066, "L0_d": 2226.58, "MLM loss": 2.759028911590576, "epoch": 0.31, "step": 27499 }, { "epoch": 0.31, "learning_rate": 9.643061224489796e-05, "loss": 2.9782, "step": 27500 }, { "FLOPS loss": 0.03546053543686867, "L0_d": 2505.81, "MLM loss": 2.997001886367798, "epoch": 0.31, "step": 27999 }, { "epoch": 0.31, "learning_rate": 9.632857142857143e-05, "loss": 2.9699, "step": 28000 }, { "FLOPS loss": 0.0343816913664341, "L0_d": 1888.12, "MLM loss": 2.913430690765381, "epoch": 0.32, "step": 28499 }, { "epoch": 0.32, "learning_rate": 9.622653061224491e-05, "loss": 2.9597, "step": 28500 }, { "FLOPS loss": 0.03750237450003624, "L0_d": 2068.45, "MLM loss": 2.724396228790283, "epoch": 0.32, "step": 28999 }, { "epoch": 0.32, "learning_rate": 9.612469387755101e-05, "loss": 2.9502, "step": 29000 }, { "FLOPS loss": 0.042042121291160583, "L0_d": 2228.12, "MLM loss": 3.0914015769958496, "epoch": 0.33, "step": 29499 }, { "epoch": 0.33, "learning_rate": 9.602265306122449e-05, "loss": 2.9379, "step": 29500 }, { "FLOPS loss": 0.0417197048664093, "L0_d": 2122.09, "MLM loss": 3.036538600921631, "epoch": 0.33, "step": 29999 }, { "epoch": 0.33, "learning_rate": 9.592061224489797e-05, "loss": 2.9336, "step": 30000 }, { "FLOPS loss": 0.03605096787214279, "L0_d": 1888.23, "MLM loss": 2.908878803253174, "epoch": 0.34, "step": 30499 }, { "epoch": 0.34, "learning_rate": 9.581857142857144e-05, "loss": 2.9226, "step": 30500 }, { "FLOPS loss": 0.03627453371882439, "L0_d": 1779.08, "MLM loss": 2.8970131874084473, "epoch": 0.34, "step": 30999 }, { "epoch": 0.34, "learning_rate": 9.571673469387756e-05, "loss": 2.9165, "step": 31000 }, { "FLOPS loss": 0.04076830670237541, "L0_d": 1848.8, "MLM loss": 2.663921356201172, "epoch": 0.35, "step": 31499 }, { "epoch": 0.35, "learning_rate": 9.561469387755102e-05, "loss": 2.9073, "step": 31500 }, { "FLOPS loss": 0.048892099410295486, "L0_d": 2155.09, "MLM loss": 2.8622241020202637, "epoch": 0.36, "step": 31999 }, { "epoch": 0.36, "learning_rate": 9.55126530612245e-05, "loss": 2.8973, "step": 32000 }, { "FLOPS loss": 0.04937506839632988, "L0_d": 2120.45, "MLM loss": 2.768759250640869, "epoch": 0.36, "step": 32499 }, { "epoch": 0.36, "learning_rate": 9.541061224489796e-05, "loss": 2.8914, "step": 32500 }, { "FLOPS loss": 0.045138247311115265, "L0_d": 1764.92, "MLM loss": 2.905224561691284, "epoch": 0.37, "step": 32999 }, { "epoch": 0.37, "learning_rate": 9.530877551020408e-05, "loss": 2.8805, "step": 33000 }, { "FLOPS loss": 0.051294956356287, "L0_d": 1922.39, "MLM loss": 2.623929977416992, "epoch": 0.37, "step": 33499 }, { "epoch": 0.37, "learning_rate": 9.520673469387755e-05, "loss": 2.8801, "step": 33500 }, { "FLOPS loss": 0.038793135434389114, "L0_d": 1495.97, "MLM loss": 2.7240848541259766, "epoch": 0.38, "step": 33999 }, { "epoch": 0.38, "learning_rate": 9.510469387755101e-05, "loss": 2.8755, "step": 34000 }, { "FLOPS loss": 0.05404862388968468, "L0_d": 1781.75, "MLM loss": 3.046609401702881, "epoch": 0.38, "step": 34499 }, { "epoch": 0.38, "learning_rate": 9.500265306122449e-05, "loss": 2.8652, "step": 34500 }, { "FLOPS loss": 0.052048321813344955, "L0_d": 1840.88, "MLM loss": 2.8207156658172607, "epoch": 0.39, "step": 34999 }, { "epoch": 0.39, "learning_rate": 9.490081632653061e-05, "loss": 2.8575, "step": 35000 }, { "FLOPS loss": 0.04301366209983826, "L0_d": 1532.47, "MLM loss": 2.855332374572754, "epoch": 0.39, "step": 35499 }, { "epoch": 0.39, "learning_rate": 9.479877551020409e-05, "loss": 2.852, "step": 35500 }, { "FLOPS loss": 0.05898360535502434, "L0_d": 2083.59, "MLM loss": 2.6988437175750732, "epoch": 0.4, "step": 35999 }, { "epoch": 0.4, "learning_rate": 9.469673469387756e-05, "loss": 2.8495, "step": 36000 }, { "FLOPS loss": 0.04053504019975662, "L0_d": 1359.09, "MLM loss": 2.7535767555236816, "epoch": 0.41, "step": 36499 }, { "epoch": 0.41, "learning_rate": 9.459469387755102e-05, "loss": 2.8378, "step": 36500 }, { "FLOPS loss": 0.057920731604099274, "L0_d": 1736.75, "MLM loss": 2.83315110206604, "epoch": 0.41, "step": 36999 }, { "epoch": 0.41, "learning_rate": 9.449265306122449e-05, "loss": 2.8328, "step": 37000 }, { "FLOPS loss": 0.04034043103456497, "L0_d": 1203.95, "MLM loss": 2.6397976875305176, "epoch": 0.42, "step": 37499 }, { "epoch": 0.42, "learning_rate": 9.439081632653062e-05, "loss": 2.8324, "step": 37500 }, { "FLOPS loss": 0.05750465765595436, "L0_d": 1703.27, "MLM loss": 2.837484121322632, "epoch": 0.42, "step": 37999 }, { "epoch": 0.42, "learning_rate": 9.428877551020408e-05, "loss": 2.8211, "step": 38000 }, { "FLOPS loss": 0.046685557812452316, "L0_d": 1341.02, "MLM loss": 2.6179981231689453, "epoch": 0.43, "step": 38499 }, { "epoch": 0.43, "learning_rate": 9.418673469387755e-05, "loss": 2.8199, "step": 38500 }, { "FLOPS loss": 0.050804365426301956, "L0_d": 1393.34, "MLM loss": 2.6517767906188965, "epoch": 0.43, "step": 38999 }, { "epoch": 0.43, "learning_rate": 9.408469387755103e-05, "loss": 2.8119, "step": 39000 }, { "FLOPS loss": 0.052113354206085205, "L0_d": 1231.92, "MLM loss": 2.6869778633117676, "epoch": 0.44, "step": 39499 }, { "epoch": 0.44, "learning_rate": 9.398285714285715e-05, "loss": 2.8039, "step": 39500 }, { "FLOPS loss": 0.05032088980078697, "L0_d": 1221.75, "MLM loss": 2.7069854736328125, "epoch": 0.44, "step": 39999 }, { "epoch": 0.44, "learning_rate": 9.388081632653061e-05, "loss": 2.8013, "step": 40000 }, { "FLOPS loss": 0.058007560670375824, "L0_d": 1402.47, "MLM loss": 2.8247499465942383, "epoch": 0.45, "step": 40499 }, { "epoch": 0.45, "learning_rate": 9.377877551020409e-05, "loss": 2.8023, "step": 40500 }, { "FLOPS loss": 0.06101697310805321, "L0_d": 1349.58, "MLM loss": 2.6886024475097656, "epoch": 0.45, "step": 40999 }, { "epoch": 0.46, "learning_rate": 9.367673469387756e-05, "loss": 2.7899, "step": 41000 }, { "FLOPS loss": 0.05123594030737877, "L0_d": 1264.33, "MLM loss": 2.824205160140991, "epoch": 0.46, "step": 41499 }, { "epoch": 0.46, "learning_rate": 9.357469387755102e-05, "loss": 2.7872, "step": 41500 }, { "FLOPS loss": 0.05108135938644409, "L0_d": 1238.2, "MLM loss": 2.7967891693115234, "epoch": 0.47, "step": 41999 }, { "epoch": 0.47, "learning_rate": 9.347285714285715e-05, "loss": 2.7814, "step": 42000 }, { "FLOPS loss": 0.048679012805223465, "L0_d": 1117.16, "MLM loss": 2.777163028717041, "epoch": 0.47, "step": 42499 }, { "epoch": 0.47, "learning_rate": 9.337081632653062e-05, "loss": 2.7766, "step": 42500 }, { "FLOPS loss": 0.06896565109491348, "L0_d": 1246.2, "MLM loss": 2.5713467597961426, "epoch": 0.48, "step": 42999 }, { "epoch": 0.48, "learning_rate": 9.326877551020408e-05, "loss": 2.7705, "step": 43000 }, { "FLOPS loss": 0.056604623794555664, "L0_d": 1198.89, "MLM loss": 2.669093608856201, "epoch": 0.48, "step": 43499 }, { "epoch": 0.48, "learning_rate": 9.316673469387755e-05, "loss": 2.7662, "step": 43500 }, { "FLOPS loss": 0.0550491139292717, "L0_d": 1054.58, "MLM loss": 2.6673429012298584, "epoch": 0.49, "step": 43999 }, { "epoch": 0.49, "learning_rate": 9.306469387755103e-05, "loss": 2.7649, "step": 44000 }, { "FLOPS loss": 0.058714158833026886, "L0_d": 1122.97, "MLM loss": 2.5899200439453125, "epoch": 0.49, "step": 44499 }, { "epoch": 0.49, "learning_rate": 9.296285714285715e-05, "loss": 2.7592, "step": 44500 }, { "FLOPS loss": 0.055097758769989014, "L0_d": 1044.19, "MLM loss": 2.683788776397705, "epoch": 0.5, "step": 44999 }, { "epoch": 0.5, "learning_rate": 9.286081632653063e-05, "loss": 2.7603, "step": 45000 }, { "FLOPS loss": 0.04979957267642021, "L0_d": 1122.89, "MLM loss": 2.7051808834075928, "epoch": 0.5, "step": 45499 }, { "epoch": 0.5, "learning_rate": 9.275877551020409e-05, "loss": 2.7522, "step": 45500 }, { "FLOPS loss": 0.06374557316303253, "L0_d": 1301.67, "MLM loss": 2.608733654022217, "epoch": 0.51, "step": 45999 }, { "epoch": 0.51, "learning_rate": 9.265673469387756e-05, "loss": 2.7519, "step": 46000 }, { "FLOPS loss": 0.0741661861538887, "L0_d": 1349.66, "MLM loss": 2.775351047515869, "epoch": 0.52, "step": 46499 }, { "epoch": 0.52, "learning_rate": 9.255489795918368e-05, "loss": 2.7494, "step": 46500 }, { "FLOPS loss": 0.06687603890895844, "L0_d": 1159.81, "MLM loss": 2.64093279838562, "epoch": 0.52, "step": 46999 }, { "epoch": 0.52, "learning_rate": 9.245285714285715e-05, "loss": 2.7441, "step": 47000 }, { "FLOPS loss": 0.04636060819029808, "L0_d": 935.94, "MLM loss": 2.689058780670166, "epoch": 0.53, "step": 47499 }, { "epoch": 0.53, "learning_rate": 9.235081632653062e-05, "loss": 2.7358, "step": 47500 }, { "FLOPS loss": 0.060041069984436035, "L0_d": 1225.31, "MLM loss": 2.7171616554260254, "epoch": 0.53, "step": 47999 }, { "epoch": 0.53, "learning_rate": 9.224897959183674e-05, "loss": 2.7388, "step": 48000 }, { "FLOPS loss": 0.06630845367908478, "L0_d": 1241.45, "MLM loss": 2.7456371784210205, "epoch": 0.54, "step": 48499 }, { "epoch": 0.54, "learning_rate": 9.21469387755102e-05, "loss": 2.737, "step": 48500 }, { "FLOPS loss": 0.046601079404354095, "L0_d": 729.88, "MLM loss": 2.6988675594329834, "epoch": 0.54, "step": 48999 }, { "epoch": 0.54, "learning_rate": 9.204489795918367e-05, "loss": 2.7325, "step": 49000 }, { "FLOPS loss": 0.07494211941957474, "L0_d": 1193.12, "MLM loss": 2.5719823837280273, "epoch": 0.55, "step": 49499 }, { "epoch": 0.55, "learning_rate": 9.194285714285715e-05, "loss": 2.7286, "step": 49500 }, { "FLOPS loss": 0.07081770151853561, "L0_d": 1134.78, "MLM loss": 2.7835943698883057, "epoch": 0.55, "step": 49999 }, { "epoch": 0.55, "learning_rate": 9.184081632653063e-05, "loss": 2.72, "step": 50000 }, { "FLOPS loss": 0.06638479977846146, "L0_d": 1167.83, "MLM loss": 2.8045172691345215, "epoch": 0.56, "step": 50499 }, { "epoch": 0.56, "learning_rate": 9.173877551020409e-05, "loss": 2.7153, "step": 50500 }, { "FLOPS loss": 0.045257825404405594, "L0_d": 739.2, "MLM loss": 2.6090309619903564, "epoch": 0.57, "step": 50999 }, { "epoch": 0.57, "learning_rate": 9.163673469387755e-05, "loss": 2.7174, "step": 51000 }, { "FLOPS loss": 0.054895590990781784, "L0_d": 1111.22, "MLM loss": 2.3793227672576904, "epoch": 0.57, "step": 51499 }, { "epoch": 0.57, "learning_rate": 9.153469387755102e-05, "loss": 2.7094, "step": 51500 }, { "FLOPS loss": 0.06622140854597092, "L0_d": 1020.75, "MLM loss": 2.778942584991455, "epoch": 0.58, "step": 51999 }, { "epoch": 0.58, "learning_rate": 9.143285714285714e-05, "loss": 2.7094, "step": 52000 }, { "FLOPS loss": 0.05828952416777611, "L0_d": 1143.81, "MLM loss": 2.644211769104004, "epoch": 0.58, "step": 52499 }, { "epoch": 0.58, "learning_rate": 9.133081632653062e-05, "loss": 2.6987, "step": 52500 }, { "FLOPS loss": 0.054243333637714386, "L0_d": 863.03, "MLM loss": 2.529053211212158, "epoch": 0.59, "step": 52999 }, { "epoch": 0.59, "learning_rate": 9.122877551020408e-05, "loss": 2.6991, "step": 53000 }, { "FLOPS loss": 0.052916064858436584, "L0_d": 1004.09, "MLM loss": 2.9836606979370117, "epoch": 0.59, "step": 53499 }, { "epoch": 0.59, "learning_rate": 9.112673469387756e-05, "loss": 2.6972, "step": 53500 }, { "FLOPS loss": 0.05424470081925392, "L0_d": 938.41, "MLM loss": 2.6670989990234375, "epoch": 0.6, "step": 53999 }, { "epoch": 0.6, "learning_rate": 9.102489795918367e-05, "loss": 2.6968, "step": 54000 }, { "FLOPS loss": 0.06169683113694191, "L0_d": 1093.36, "MLM loss": 2.836012840270996, "epoch": 0.6, "step": 54499 }, { "epoch": 0.6, "learning_rate": 9.092285714285715e-05, "loss": 2.6927, "step": 54500 }, { "FLOPS loss": 0.0750756487250328, "L0_d": 1154.27, "MLM loss": 2.5062623023986816, "epoch": 0.61, "step": 54999 }, { "epoch": 0.61, "learning_rate": 9.082081632653061e-05, "loss": 2.6892, "step": 55000 }, { "FLOPS loss": 0.06515518575906754, "L0_d": 1043.03, "MLM loss": 2.7674970626831055, "epoch": 0.62, "step": 55499 }, { "epoch": 0.62, "learning_rate": 9.071877551020409e-05, "loss": 2.6762, "step": 55500 }, { "FLOPS loss": 0.07003561407327652, "L0_d": 1195.55, "MLM loss": 2.6636061668395996, "epoch": 0.62, "step": 55999 }, { "epoch": 0.62, "learning_rate": 9.061673469387755e-05, "loss": 2.6714, "step": 56000 }, { "FLOPS loss": 0.07769113034009933, "L0_d": 1067.41, "MLM loss": 2.6089301109313965, "epoch": 0.63, "step": 56499 }, { "epoch": 0.63, "learning_rate": 9.051489795918368e-05, "loss": 2.6694, "step": 56500 }, { "FLOPS loss": 0.04882253333926201, "L0_d": 756.2, "MLM loss": 2.5227391719818115, "epoch": 0.63, "step": 56999 }, { "epoch": 0.63, "learning_rate": 9.041285714285714e-05, "loss": 2.6694, "step": 57000 }, { "FLOPS loss": 0.0605013407766819, "L0_d": 1153.06, "MLM loss": 2.697270393371582, "epoch": 0.64, "step": 57499 }, { "epoch": 0.64, "learning_rate": 9.031081632653062e-05, "loss": 2.6673, "step": 57500 }, { "FLOPS loss": 0.059701476246118546, "L0_d": 843.02, "MLM loss": 2.5305943489074707, "epoch": 0.64, "step": 57999 }, { "epoch": 0.64, "learning_rate": 9.02087755102041e-05, "loss": 2.6601, "step": 58000 }, { "FLOPS loss": 0.054951492697000504, "L0_d": 910.19, "MLM loss": 2.6635236740112305, "epoch": 0.65, "step": 58499 }, { "epoch": 0.65, "learning_rate": 9.01069387755102e-05, "loss": 2.6573, "step": 58500 }, { "FLOPS loss": 0.06101666018366814, "L0_d": 1046.78, "MLM loss": 2.7258644104003906, "epoch": 0.65, "step": 58999 }, { "epoch": 0.65, "learning_rate": 9.000489795918368e-05, "loss": 2.6562, "step": 59000 }, { "FLOPS loss": 0.05804910883307457, "L0_d": 905.23, "MLM loss": 2.5663816928863525, "epoch": 0.66, "step": 59499 }, { "epoch": 0.66, "learning_rate": 8.990285714285715e-05, "loss": 2.6554, "step": 59500 }, { "FLOPS loss": 0.06257927417755127, "L0_d": 1046.14, "MLM loss": 2.633559226989746, "epoch": 0.67, "step": 59999 }, { "epoch": 0.67, "learning_rate": 8.980081632653061e-05, "loss": 2.6472, "step": 60000 }, { "FLOPS loss": 0.0484636053442955, "L0_d": 865.48, "MLM loss": 2.699965000152588, "epoch": 0.67, "step": 60499 }, { "epoch": 0.67, "learning_rate": 8.969897959183675e-05, "loss": 2.6469, "step": 60500 }, { "FLOPS loss": 0.060321953147649765, "L0_d": 826.08, "MLM loss": 2.4169182777404785, "epoch": 0.68, "step": 60999 }, { "epoch": 0.68, "learning_rate": 8.959693877551021e-05, "loss": 2.6458, "step": 61000 }, { "FLOPS loss": 0.062278661876916885, "L0_d": 838.16, "MLM loss": 2.5595428943634033, "epoch": 0.68, "step": 61499 }, { "epoch": 0.68, "learning_rate": 8.949489795918367e-05, "loss": 2.6358, "step": 61500 }, { "FLOPS loss": 0.05148935317993164, "L0_d": 821.52, "MLM loss": 2.4650089740753174, "epoch": 0.69, "step": 61999 }, { "epoch": 0.69, "learning_rate": 8.939285714285714e-05, "loss": 2.6412, "step": 62000 }, { "FLOPS loss": 0.04998023435473442, "L0_d": 969.06, "MLM loss": 2.7552490234375, "epoch": 0.69, "step": 62499 }, { "epoch": 0.69, "learning_rate": 8.929102040816327e-05, "loss": 2.636, "step": 62500 }, { "FLOPS loss": 0.04964654892683029, "L0_d": 875.78, "MLM loss": 2.7296853065490723, "epoch": 0.7, "step": 62999 }, { "epoch": 0.7, "learning_rate": 8.918897959183674e-05, "loss": 2.6324, "step": 63000 }, { "FLOPS loss": 0.057113952934741974, "L0_d": 879.17, "MLM loss": 2.6276140213012695, "epoch": 0.7, "step": 63499 }, { "epoch": 0.7, "learning_rate": 8.90869387755102e-05, "loss": 2.6294, "step": 63500 }, { "FLOPS loss": 0.0638364925980568, "L0_d": 958.55, "MLM loss": 2.541940450668335, "epoch": 0.71, "step": 63999 }, { "epoch": 0.71, "learning_rate": 8.898489795918368e-05, "loss": 2.6225, "step": 64000 }, { "FLOPS loss": 0.08000694215297699, "L0_d": 1295.39, "MLM loss": 2.740926504135132, "epoch": 0.72, "step": 64499 }, { "epoch": 0.72, "learning_rate": 8.88830612244898e-05, "loss": 2.623, "step": 64500 }, { "FLOPS loss": 0.05877278372645378, "L0_d": 851.88, "MLM loss": 2.487786054611206, "epoch": 0.72, "step": 64999 }, { "epoch": 0.72, "learning_rate": 8.878102040816327e-05, "loss": 2.6155, "step": 65000 }, { "FLOPS loss": 0.06732272356748581, "L0_d": 1071.2, "MLM loss": 2.7441048622131348, "epoch": 0.73, "step": 65499 }, { "epoch": 0.73, "learning_rate": 8.867897959183675e-05, "loss": 2.6137, "step": 65500 }, { "FLOPS loss": 0.0558248832821846, "L0_d": 965.09, "MLM loss": 2.576885938644409, "epoch": 0.73, "step": 65999 }, { "epoch": 0.73, "learning_rate": 8.857693877551021e-05, "loss": 2.6125, "step": 66000 }, { "FLOPS loss": 0.06944676488637924, "L0_d": 1012.27, "MLM loss": 2.398052453994751, "epoch": 0.74, "step": 66499 }, { "epoch": 0.74, "learning_rate": 8.847510204081633e-05, "loss": 2.6131, "step": 66500 }, { "FLOPS loss": 0.04680348187685013, "L0_d": 771.84, "MLM loss": 2.7348499298095703, "epoch": 0.74, "step": 66999 }, { "epoch": 0.74, "learning_rate": 8.83730612244898e-05, "loss": 2.607, "step": 67000 }, { "FLOPS loss": 0.06255348771810532, "L0_d": 1090.86, "MLM loss": 2.5733184814453125, "epoch": 0.75, "step": 67499 }, { "epoch": 0.75, "learning_rate": 8.827102040816327e-05, "loss": 2.609, "step": 67500 }, { "FLOPS loss": 0.04876190051436424, "L0_d": 912.47, "MLM loss": 2.5150537490844727, "epoch": 0.75, "step": 67999 }, { "epoch": 0.75, "learning_rate": 8.816897959183674e-05, "loss": 2.6046, "step": 68000 }, { "FLOPS loss": 0.060514744371175766, "L0_d": 825.95, "MLM loss": 2.3934388160705566, "epoch": 0.76, "step": 68499 }, { "epoch": 0.76, "learning_rate": 8.806693877551022e-05, "loss": 2.6011, "step": 68500 }, { "FLOPS loss": 0.06146342679858208, "L0_d": 866.45, "MLM loss": 2.349757432937622, "epoch": 0.77, "step": 68999 }, { "epoch": 0.77, "learning_rate": 8.796489795918368e-05, "loss": 2.5945, "step": 69000 }, { "FLOPS loss": 0.06182211637496948, "L0_d": 1050.36, "MLM loss": 2.676337957382202, "epoch": 0.77, "step": 69499 }, { "epoch": 0.77, "learning_rate": 8.786285714285715e-05, "loss": 2.5978, "step": 69500 }, { "FLOPS loss": 0.060056786984205246, "L0_d": 1027.73, "MLM loss": 2.4919960498809814, "epoch": 0.78, "step": 69999 }, { "epoch": 0.78, "learning_rate": 8.776081632653061e-05, "loss": 2.5925, "step": 70000 }, { "FLOPS loss": 0.05691603943705559, "L0_d": 1005.89, "MLM loss": 2.491795539855957, "epoch": 0.78, "step": 70499 }, { "epoch": 0.78, "learning_rate": 8.765897959183674e-05, "loss": 2.5934, "step": 70500 }, { "FLOPS loss": 0.04397211968898773, "L0_d": 890.69, "MLM loss": 2.4429917335510254, "epoch": 0.79, "step": 70999 }, { "epoch": 0.79, "learning_rate": 8.755693877551021e-05, "loss": 2.5945, "step": 71000 }, { "FLOPS loss": 0.06304601579904556, "L0_d": 1083.02, "MLM loss": 2.5249195098876953, "epoch": 0.79, "step": 71499 }, { "epoch": 0.79, "learning_rate": 8.745489795918367e-05, "loss": 2.5856, "step": 71500 }, { "FLOPS loss": 0.08187592774629593, "L0_d": 1269.14, "MLM loss": 2.5971012115478516, "epoch": 0.8, "step": 71999 }, { "epoch": 0.8, "learning_rate": 8.735285714285715e-05, "loss": 2.5822, "step": 72000 }, { "FLOPS loss": 0.06232510879635811, "L0_d": 1257.69, "MLM loss": 2.3417344093322754, "epoch": 0.8, "step": 72499 }, { "epoch": 0.8, "learning_rate": 8.725102040816326e-05, "loss": 2.5791, "step": 72500 }, { "FLOPS loss": 0.06792527437210083, "L0_d": 1044.23, "MLM loss": 2.5267670154571533, "epoch": 0.81, "step": 72999 }, { "epoch": 0.81, "learning_rate": 8.714897959183674e-05, "loss": 2.5755, "step": 73000 }, { "FLOPS loss": 0.04973457008600235, "L0_d": 750.81, "MLM loss": 2.533811330795288, "epoch": 0.82, "step": 73499 }, { "epoch": 0.82, "learning_rate": 8.704693877551022e-05, "loss": 2.5761, "step": 73500 }, { "FLOPS loss": 0.06060154736042023, "L0_d": 1078.53, "MLM loss": 2.675647020339966, "epoch": 0.82, "step": 73999 }, { "epoch": 0.82, "learning_rate": 8.694489795918368e-05, "loss": 2.5707, "step": 74000 }, { "FLOPS loss": 0.053472839295864105, "L0_d": 922.08, "MLM loss": 2.2746894359588623, "epoch": 0.83, "step": 74499 }, { "epoch": 0.83, "learning_rate": 8.68430612244898e-05, "loss": 2.5693, "step": 74500 }, { "FLOPS loss": 0.05671130120754242, "L0_d": 977.31, "MLM loss": 2.4245667457580566, "epoch": 0.83, "step": 74999 }, { "epoch": 0.83, "learning_rate": 8.674102040816327e-05, "loss": 2.5695, "step": 75000 }, { "FLOPS loss": 0.05780375748872757, "L0_d": 996.98, "MLM loss": 2.4184908866882324, "epoch": 0.84, "step": 75499 }, { "epoch": 0.84, "learning_rate": 8.663897959183674e-05, "loss": 2.5619, "step": 75500 }, { "FLOPS loss": 0.04598892107605934, "L0_d": 812.52, "MLM loss": 2.5372962951660156, "epoch": 0.84, "step": 75999 }, { "epoch": 0.84, "learning_rate": 8.653693877551021e-05, "loss": 2.5612, "step": 76000 }, { "FLOPS loss": 0.0738496482372284, "L0_d": 1309.52, "MLM loss": 2.6247034072875977, "epoch": 0.85, "step": 76499 }, { "epoch": 0.85, "learning_rate": 8.643510204081633e-05, "loss": 2.5625, "step": 76500 }, { "FLOPS loss": 0.06627869606018066, "L0_d": 1180.94, "MLM loss": 2.5750892162323, "epoch": 0.85, "step": 76999 }, { "epoch": 0.85, "learning_rate": 8.63330612244898e-05, "loss": 2.5608, "step": 77000 }, { "FLOPS loss": 0.051825810223817825, "L0_d": 960.0, "MLM loss": 2.6272084712982178, "epoch": 0.86, "step": 77499 }, { "epoch": 0.86, "learning_rate": 8.623102040816326e-05, "loss": 2.5571, "step": 77500 }, { "FLOPS loss": 0.0571591779589653, "L0_d": 975.31, "MLM loss": 2.584547281265259, "epoch": 0.87, "step": 77999 }, { "epoch": 0.87, "learning_rate": 8.61291836734694e-05, "loss": 2.5529, "step": 78000 }, { "FLOPS loss": 0.05316011235117912, "L0_d": 743.8, "MLM loss": 2.3314132690429688, "epoch": 0.87, "step": 78499 }, { "epoch": 0.87, "learning_rate": 8.602714285714286e-05, "loss": 2.5566, "step": 78500 }, { "FLOPS loss": 0.04259267449378967, "L0_d": 841.89, "MLM loss": 2.4888720512390137, "epoch": 0.88, "step": 78999 }, { "epoch": 0.88, "learning_rate": 8.592510204081634e-05, "loss": 2.5516, "step": 79000 }, { "FLOPS loss": 0.06571514904499054, "L0_d": 1007.47, "MLM loss": 2.749783992767334, "epoch": 0.88, "step": 79499 }, { "epoch": 0.88, "learning_rate": 8.58230612244898e-05, "loss": 2.5517, "step": 79500 }, { "FLOPS loss": 0.060765206813812256, "L0_d": 891.48, "MLM loss": 2.3291566371917725, "epoch": 0.89, "step": 79999 }, { "epoch": 0.89, "learning_rate": 8.572122448979592e-05, "loss": 2.5482, "step": 80000 }, { "FLOPS loss": 0.05448305979371071, "L0_d": 850.91, "MLM loss": 2.6289730072021484, "epoch": 0.89, "step": 80499 }, { "epoch": 0.89, "learning_rate": 8.56191836734694e-05, "loss": 2.5439, "step": 80500 }, { "FLOPS loss": 0.07561685889959335, "L0_d": 1067.91, "MLM loss": 2.4807238578796387, "epoch": 0.9, "step": 80999 }, { "epoch": 0.9, "learning_rate": 8.551714285714286e-05, "loss": 2.5415, "step": 81000 }, { "FLOPS loss": 0.0704834833741188, "L0_d": 985.7, "MLM loss": 2.625803232192993, "epoch": 0.9, "step": 81499 }, { "epoch": 0.9, "learning_rate": 8.541510204081633e-05, "loss": 2.5447, "step": 81500 }, { "FLOPS loss": 0.05405523627996445, "L0_d": 879.75, "MLM loss": 2.4514644145965576, "epoch": 0.91, "step": 81999 }, { "epoch": 0.91, "learning_rate": 8.531326530612246e-05, "loss": 2.5372, "step": 82000 }, { "FLOPS loss": 0.06355171650648117, "L0_d": 1124.23, "MLM loss": 2.7510833740234375, "epoch": 0.92, "step": 82499 }, { "epoch": 0.92, "learning_rate": 8.521122448979593e-05, "loss": 2.5327, "step": 82500 }, { "FLOPS loss": 0.05904227867722511, "L0_d": 870.48, "MLM loss": 2.5004591941833496, "epoch": 0.92, "step": 82999 }, { "epoch": 0.92, "learning_rate": 8.510918367346939e-05, "loss": 2.5316, "step": 83000 }, { "FLOPS loss": 0.03980398550629616, "L0_d": 750.81, "MLM loss": 2.4699714183807373, "epoch": 0.93, "step": 83499 }, { "epoch": 0.93, "learning_rate": 8.500714285714286e-05, "loss": 2.5285, "step": 83500 }, { "FLOPS loss": 0.06711471080780029, "L0_d": 949.61, "MLM loss": 2.3448944091796875, "epoch": 0.93, "step": 83999 }, { "epoch": 0.93, "learning_rate": 8.490510204081634e-05, "loss": 2.5311, "step": 84000 }, { "FLOPS loss": 0.05809421092271805, "L0_d": 975.31, "MLM loss": 2.487459182739258, "epoch": 0.94, "step": 84499 }, { "epoch": 0.94, "learning_rate": 8.48030612244898e-05, "loss": 2.5298, "step": 84500 }, { "FLOPS loss": 0.04452372342348099, "L0_d": 811.64, "MLM loss": 2.4875330924987793, "epoch": 0.94, "step": 84999 }, { "epoch": 0.94, "learning_rate": 8.470102040816327e-05, "loss": 2.5255, "step": 85000 }, { "FLOPS loss": 0.05485633388161659, "L0_d": 948.16, "MLM loss": 2.5577940940856934, "epoch": 0.95, "step": 85499 }, { "epoch": 0.95, "learning_rate": 8.459897959183673e-05, "loss": 2.5283, "step": 85500 }, { "FLOPS loss": 0.0828530341386795, "L0_d": 1407.45, "MLM loss": 2.59635066986084, "epoch": 0.95, "step": 85999 }, { "epoch": 0.95, "learning_rate": 8.449714285714286e-05, "loss": 2.5265, "step": 86000 }, { "FLOPS loss": 0.047432463616132736, "L0_d": 879.78, "MLM loss": 2.6348631381988525, "epoch": 0.96, "step": 86499 }, { "epoch": 0.96, "learning_rate": 8.439510204081633e-05, "loss": 2.5201, "step": 86500 }, { "FLOPS loss": 0.06606831401586533, "L0_d": 1156.88, "MLM loss": 2.4339711666107178, "epoch": 0.97, "step": 86999 }, { "epoch": 0.97, "learning_rate": 8.42930612244898e-05, "loss": 2.5181, "step": 87000 }, { "FLOPS loss": 0.08546411991119385, "L0_d": 1172.02, "MLM loss": 2.4836373329162598, "epoch": 0.97, "step": 87499 }, { "epoch": 0.97, "learning_rate": 8.419102040816327e-05, "loss": 2.5226, "step": 87500 }, { "FLOPS loss": 0.06991736590862274, "L0_d": 1059.84, "MLM loss": 2.4597690105438232, "epoch": 0.98, "step": 87999 }, { "epoch": 0.98, "learning_rate": 8.408897959183674e-05, "loss": 2.5165, "step": 88000 }, { "FLOPS loss": 0.05534587427973747, "L0_d": 1017.25, "MLM loss": 2.4961466789245605, "epoch": 0.98, "step": 88499 }, { "epoch": 0.98, "learning_rate": 8.398714285714287e-05, "loss": 2.5117, "step": 88500 }, { "FLOPS loss": 0.0569668672978878, "L0_d": 903.27, "MLM loss": 2.3658652305603027, "epoch": 0.99, "step": 88999 }, { "epoch": 0.99, "learning_rate": 8.388510204081634e-05, "loss": 2.5162, "step": 89000 }, { "FLOPS loss": 0.058121200650930405, "L0_d": 904.06, "MLM loss": 2.4028067588806152, "epoch": 0.99, "step": 89499 }, { "epoch": 0.99, "learning_rate": 8.37830612244898e-05, "loss": 2.5091, "step": 89500 }, { "FLOPS loss": 0.050532229244709015, "L0_d": 943.59, "MLM loss": 2.528367757797241, "epoch": 1.0, "step": 89999 }, { "epoch": 1.0, "learning_rate": 8.368102040816326e-05, "loss": 2.5078, "step": 90000 }, { "FLOPS loss": 0.053834713995456696, "L0_d": 863.38, "MLM loss": 2.2239866256713867, "epoch": 1.0, "step": 90499 }, { "epoch": 1.0, "learning_rate": 8.357897959183674e-05, "loss": 2.4994, "step": 90500 }, { "FLOPS loss": 0.05430047586560249, "L0_d": 962.91, "MLM loss": 2.4747774600982666, "epoch": 1.01, "step": 90999 }, { "epoch": 1.01, "learning_rate": 8.347693877551021e-05, "loss": 2.5026, "step": 91000 }, { "FLOPS loss": 0.07724824547767639, "L0_d": 1078.22, "MLM loss": 2.3891727924346924, "epoch": 1.02, "step": 91499 }, { "epoch": 1.02, "learning_rate": 8.337489795918367e-05, "loss": 2.506, "step": 91500 }, { "FLOPS loss": 0.06916126608848572, "L0_d": 1142.8, "MLM loss": 2.4653806686401367, "epoch": 1.02, "step": 91999 }, { "epoch": 1.02, "learning_rate": 8.32730612244898e-05, "loss": 2.4983, "step": 92000 }, { "FLOPS loss": 0.0642898678779602, "L0_d": 1087.31, "MLM loss": 2.3559579849243164, "epoch": 1.03, "step": 92499 }, { "epoch": 1.03, "learning_rate": 8.317102040816327e-05, "loss": 2.4967, "step": 92500 }, { "FLOPS loss": 0.05998803675174713, "L0_d": 1063.61, "MLM loss": 2.431797742843628, "epoch": 1.03, "step": 92999 }, { "epoch": 1.03, "learning_rate": 8.306897959183674e-05, "loss": 2.4966, "step": 93000 }, { "FLOPS loss": 0.05855776369571686, "L0_d": 1091.62, "MLM loss": 2.2639646530151367, "epoch": 1.04, "step": 93499 }, { "epoch": 1.04, "learning_rate": 8.29669387755102e-05, "loss": 2.4966, "step": 93500 }, { "FLOPS loss": 0.06000414863228798, "L0_d": 1061.3, "MLM loss": 2.525177001953125, "epoch": 1.04, "step": 93999 }, { "epoch": 1.04, "learning_rate": 8.286489795918368e-05, "loss": 2.492, "step": 94000 }, { "FLOPS loss": 0.0527518056333065, "L0_d": 749.3, "MLM loss": 2.2438971996307373, "epoch": 1.05, "step": 94499 }, { "epoch": 1.05, "learning_rate": 8.27630612244898e-05, "loss": 2.4884, "step": 94500 }, { "FLOPS loss": 0.06188157945871353, "L0_d": 1051.8, "MLM loss": 2.462313175201416, "epoch": 1.05, "step": 94999 }, { "epoch": 1.05, "learning_rate": 8.266102040816326e-05, "loss": 2.4932, "step": 95000 }, { "FLOPS loss": 0.07006128877401352, "L0_d": 870.84, "MLM loss": 2.391632080078125, "epoch": 1.06, "step": 95499 }, { "epoch": 1.06, "learning_rate": 8.255897959183674e-05, "loss": 2.4913, "step": 95500 }, { "FLOPS loss": 0.06805186718702316, "L0_d": 1000.28, "MLM loss": 2.424804210662842, "epoch": 1.07, "step": 95999 }, { "epoch": 1.07, "learning_rate": 8.245693877551021e-05, "loss": 2.4825, "step": 96000 }, { "FLOPS loss": 0.05748196691274643, "L0_d": 868.95, "MLM loss": 2.501098871231079, "epoch": 1.07, "step": 96499 }, { "epoch": 1.07, "learning_rate": 8.235489795918367e-05, "loss": 2.4871, "step": 96500 }, { "FLOPS loss": 0.04384532943367958, "L0_d": 744.17, "MLM loss": 2.475280284881592, "epoch": 1.08, "step": 96999 }, { "epoch": 1.08, "learning_rate": 8.22530612244898e-05, "loss": 2.4829, "step": 97000 }, { "FLOPS loss": 0.07153061032295227, "L0_d": 1052.89, "MLM loss": 2.5760295391082764, "epoch": 1.08, "step": 97499 }, { "epoch": 1.08, "learning_rate": 8.215102040816327e-05, "loss": 2.4865, "step": 97500 }, { "FLOPS loss": 0.0507240891456604, "L0_d": 716.66, "MLM loss": 2.4014198780059814, "epoch": 1.09, "step": 97999 }, { "epoch": 1.09, "learning_rate": 8.204897959183674e-05, "loss": 2.481, "step": 98000 }, { "FLOPS loss": 0.06032687798142433, "L0_d": 955.14, "MLM loss": 2.372854232788086, "epoch": 1.09, "step": 98499 }, { "epoch": 1.09, "learning_rate": 8.19469387755102e-05, "loss": 2.4837, "step": 98500 }, { "FLOPS loss": 0.06308581680059433, "L0_d": 1145.91, "MLM loss": 2.506711483001709, "epoch": 1.1, "step": 98999 }, { "epoch": 1.1, "learning_rate": 8.184510204081633e-05, "loss": 2.4829, "step": 99000 }, { "FLOPS loss": 0.05042247101664543, "L0_d": 831.45, "MLM loss": 2.501941442489624, "epoch": 1.1, "step": 99499 }, { "epoch": 1.1, "learning_rate": 8.17430612244898e-05, "loss": 2.4767, "step": 99500 }, { "FLOPS loss": 0.06764364242553711, "L0_d": 1286.0, "MLM loss": 2.3605010509490967, "epoch": 1.11, "step": 99999 }, { "epoch": 1.11, "learning_rate": 8.164102040816328e-05, "loss": 2.479, "step": 100000 }, { "FLOPS loss": 0.0610102042555809, "L0_d": 865.62, "MLM loss": 2.458651542663574, "epoch": 1.12, "step": 100499 }, { "epoch": 1.12, "learning_rate": 8.153897959183674e-05, "loss": 2.4756, "step": 100500 }, { "FLOPS loss": 0.06401363015174866, "L0_d": 1166.59, "MLM loss": 2.4608983993530273, "epoch": 1.12, "step": 100999 }, { "epoch": 1.12, "learning_rate": 8.143693877551021e-05, "loss": 2.4727, "step": 101000 }, { "FLOPS loss": 0.04460912197828293, "L0_d": 850.44, "MLM loss": 2.3825573921203613, "epoch": 1.13, "step": 101499 }, { "epoch": 1.13, "learning_rate": 8.133489795918367e-05, "loss": 2.4733, "step": 101500 }, { "FLOPS loss": 0.0666898861527443, "L0_d": 1252.23, "MLM loss": 2.354072332382202, "epoch": 1.13, "step": 101999 }, { "epoch": 1.13, "learning_rate": 8.12330612244898e-05, "loss": 2.4747, "step": 102000 }, { "FLOPS loss": 0.04896422475576401, "L0_d": 821.73, "MLM loss": 2.2765748500823975, "epoch": 1.14, "step": 102499 }, { "epoch": 1.14, "learning_rate": 8.113102040816327e-05, "loss": 2.4651, "step": 102500 }, { "FLOPS loss": 0.05629485473036766, "L0_d": 1071.08, "MLM loss": 2.3546361923217773, "epoch": 1.14, "step": 102999 }, { "epoch": 1.14, "learning_rate": 8.102897959183674e-05, "loss": 2.4688, "step": 103000 }, { "FLOPS loss": 0.0577983632683754, "L0_d": 938.2, "MLM loss": 2.3472390174865723, "epoch": 1.15, "step": 103499 }, { "epoch": 1.15, "learning_rate": 8.092693877551021e-05, "loss": 2.4674, "step": 103500 }, { "FLOPS loss": 0.0506107471883297, "L0_d": 840.83, "MLM loss": 2.500201940536499, "epoch": 1.15, "step": 103999 }, { "epoch": 1.15, "learning_rate": 8.082510204081632e-05, "loss": 2.4658, "step": 104000 }, { "FLOPS loss": 0.05707992985844612, "L0_d": 806.86, "MLM loss": 2.604954242706299, "epoch": 1.16, "step": 104499 }, { "epoch": 1.16, "learning_rate": 8.07230612244898e-05, "loss": 2.4642, "step": 104500 }, { "FLOPS loss": 0.06948617100715637, "L0_d": 1061.86, "MLM loss": 2.459710121154785, "epoch": 1.17, "step": 104999 }, { "epoch": 1.17, "learning_rate": 8.062102040816328e-05, "loss": 2.4634, "step": 105000 }, { "FLOPS loss": 0.062380943447351456, "L0_d": 942.94, "MLM loss": 2.4872617721557617, "epoch": 1.17, "step": 105499 }, { "epoch": 1.17, "learning_rate": 8.051897959183674e-05, "loss": 2.4589, "step": 105500 }, { "FLOPS loss": 0.05149580538272858, "L0_d": 871.34, "MLM loss": 2.1509368419647217, "epoch": 1.18, "step": 105999 }, { "epoch": 1.18, "learning_rate": 8.041714285714286e-05, "loss": 2.4601, "step": 106000 }, { "FLOPS loss": 0.045978888869285583, "L0_d": 843.78, "MLM loss": 2.396900177001953, "epoch": 1.18, "step": 106499 }, { "epoch": 1.18, "learning_rate": 8.031510204081633e-05, "loss": 2.4594, "step": 106500 }, { "FLOPS loss": 0.0607205294072628, "L0_d": 1005.7, "MLM loss": 2.567096471786499, "epoch": 1.19, "step": 106999 }, { "epoch": 1.19, "learning_rate": 8.02130612244898e-05, "loss": 2.4548, "step": 107000 }, { "FLOPS loss": 0.05772789195179939, "L0_d": 1163.84, "MLM loss": 2.3289132118225098, "epoch": 1.19, "step": 107499 }, { "epoch": 1.19, "learning_rate": 8.011102040816327e-05, "loss": 2.4587, "step": 107500 }, { "FLOPS loss": 0.061777133494615555, "L0_d": 915.8, "MLM loss": 2.299283981323242, "epoch": 1.2, "step": 107999 }, { "epoch": 1.2, "learning_rate": 8.000897959183673e-05, "loss": 2.4587, "step": 108000 }, { "FLOPS loss": 0.05991097912192345, "L0_d": 955.47, "MLM loss": 2.1426010131835938, "epoch": 1.2, "step": 108499 }, { "epoch": 1.2, "learning_rate": 7.990714285714286e-05, "loss": 2.4499, "step": 108500 }, { "FLOPS loss": 0.054815031588077545, "L0_d": 875.98, "MLM loss": 2.507615089416504, "epoch": 1.21, "step": 108999 }, { "epoch": 1.21, "learning_rate": 7.980510204081632e-05, "loss": 2.4466, "step": 109000 }, { "FLOPS loss": 0.05764458328485489, "L0_d": 997.47, "MLM loss": 2.537228584289551, "epoch": 1.22, "step": 109499 }, { "epoch": 1.22, "learning_rate": 7.97030612244898e-05, "loss": 2.4544, "step": 109500 }, { "FLOPS loss": 0.06962741166353226, "L0_d": 1251.38, "MLM loss": 2.4341816902160645, "epoch": 1.22, "step": 109999 }, { "epoch": 1.22, "learning_rate": 7.960102040816328e-05, "loss": 2.4476, "step": 110000 }, { "FLOPS loss": 0.04508056491613388, "L0_d": 1015.05, "MLM loss": 2.5601704120635986, "epoch": 1.23, "step": 110499 }, { "epoch": 1.23, "learning_rate": 7.949918367346938e-05, "loss": 2.4526, "step": 110500 }, { "FLOPS loss": 0.058289121836423874, "L0_d": 949.98, "MLM loss": 2.4111971855163574, "epoch": 1.23, "step": 110999 }, { "epoch": 1.23, "learning_rate": 7.939714285714286e-05, "loss": 2.4489, "step": 111000 }, { "FLOPS loss": 0.04966416954994202, "L0_d": 797.47, "MLM loss": 2.188117742538452, "epoch": 1.24, "step": 111499 }, { "epoch": 1.24, "learning_rate": 7.929510204081633e-05, "loss": 2.4481, "step": 111500 }, { "FLOPS loss": 0.04224289581179619, "L0_d": 718.98, "MLM loss": 2.3695602416992188, "epoch": 1.24, "step": 111999 }, { "epoch": 1.24, "learning_rate": 7.919306122448979e-05, "loss": 2.4421, "step": 112000 }, { "FLOPS loss": 0.06581538170576096, "L0_d": 982.22, "MLM loss": 2.454967737197876, "epoch": 1.25, "step": 112499 }, { "epoch": 1.25, "learning_rate": 7.909102040816327e-05, "loss": 2.4438, "step": 112500 }, { "FLOPS loss": 0.06850145012140274, "L0_d": 1238.27, "MLM loss": 2.422428607940674, "epoch": 1.25, "step": 112999 }, { "epoch": 1.25, "learning_rate": 7.898918367346939e-05, "loss": 2.4425, "step": 113000 }, { "FLOPS loss": 0.06339308619499207, "L0_d": 1150.89, "MLM loss": 2.441134452819824, "epoch": 1.26, "step": 113499 }, { "epoch": 1.26, "learning_rate": 7.888714285714286e-05, "loss": 2.4381, "step": 113500 }, { "FLOPS loss": 0.06040579825639725, "L0_d": 929.83, "MLM loss": 2.2993974685668945, "epoch": 1.27, "step": 113999 }, { "epoch": 1.27, "learning_rate": 7.878510204081633e-05, "loss": 2.4416, "step": 114000 }, { "FLOPS loss": 0.047220006585121155, "L0_d": 909.89, "MLM loss": 2.3804566860198975, "epoch": 1.27, "step": 114499 }, { "epoch": 1.27, "learning_rate": 7.86830612244898e-05, "loss": 2.4417, "step": 114500 }, { "FLOPS loss": 0.06454654783010483, "L0_d": 1075.69, "MLM loss": 2.561215400695801, "epoch": 1.28, "step": 114999 }, { "epoch": 1.28, "learning_rate": 7.858102040816326e-05, "loss": 2.4356, "step": 115000 }, { "FLOPS loss": 0.05715673789381981, "L0_d": 999.84, "MLM loss": 2.44301438331604, "epoch": 1.28, "step": 115499 }, { "epoch": 1.28, "learning_rate": 7.847897959183674e-05, "loss": 2.4372, "step": 115500 }, { "FLOPS loss": 0.058160748332738876, "L0_d": 738.77, "MLM loss": 2.5617055892944336, "epoch": 1.29, "step": 115999 }, { "epoch": 1.29, "learning_rate": 7.837714285714286e-05, "loss": 2.4354, "step": 116000 }, { "FLOPS loss": 0.0651092603802681, "L0_d": 1030.95, "MLM loss": 2.3533689975738525, "epoch": 1.29, "step": 116499 }, { "epoch": 1.29, "learning_rate": 7.827510204081633e-05, "loss": 2.4321, "step": 116500 }, { "FLOPS loss": 0.058790192008018494, "L0_d": 942.95, "MLM loss": 2.5071709156036377, "epoch": 1.3, "step": 116999 }, { "epoch": 1.3, "learning_rate": 7.817306122448979e-05, "loss": 2.4349, "step": 117000 }, { "FLOPS loss": 0.05849099159240723, "L0_d": 875.17, "MLM loss": 2.3198471069335938, "epoch": 1.3, "step": 117499 }, { "epoch": 1.3, "learning_rate": 7.807102040816327e-05, "loss": 2.4309, "step": 117500 }, { "FLOPS loss": 0.06788192689418793, "L0_d": 1271.83, "MLM loss": 2.2371692657470703, "epoch": 1.31, "step": 117999 }, { "epoch": 1.31, "learning_rate": 7.796897959183675e-05, "loss": 2.4331, "step": 118000 }, { "FLOPS loss": 0.07915814965963364, "L0_d": 1088.23, "MLM loss": 2.3536338806152344, "epoch": 1.32, "step": 118499 }, { "epoch": 1.32, "learning_rate": 7.786693877551021e-05, "loss": 2.4267, "step": 118500 }, { "FLOPS loss": 0.047443993389606476, "L0_d": 836.97, "MLM loss": 2.4884071350097656, "epoch": 1.32, "step": 118999 }, { "epoch": 1.32, "learning_rate": 7.776510204081633e-05, "loss": 2.4267, "step": 119000 }, { "FLOPS loss": 0.05022300407290459, "L0_d": 792.25, "MLM loss": 2.4337425231933594, "epoch": 1.33, "step": 119499 }, { "epoch": 1.33, "learning_rate": 7.76630612244898e-05, "loss": 2.4206, "step": 119500 }, { "FLOPS loss": 0.05764727666974068, "L0_d": 1037.53, "MLM loss": 2.4468226432800293, "epoch": 1.33, "step": 119999 }, { "epoch": 1.33, "learning_rate": 7.756102040816326e-05, "loss": 2.4264, "step": 120000 }, { "FLOPS loss": 0.048015046864748, "L0_d": 709.09, "MLM loss": 2.346395492553711, "epoch": 1.34, "step": 120499 }, { "epoch": 1.34, "learning_rate": 7.745897959183673e-05, "loss": 2.4233, "step": 120500 }, { "FLOPS loss": 0.047082386910915375, "L0_d": 828.02, "MLM loss": 2.211945056915283, "epoch": 1.34, "step": 120999 }, { "epoch": 1.34, "learning_rate": 7.735714285714286e-05, "loss": 2.4248, "step": 121000 }, { "FLOPS loss": 0.07244478911161423, "L0_d": 1244.89, "MLM loss": 2.3993351459503174, "epoch": 1.35, "step": 121499 }, { "epoch": 1.35, "learning_rate": 7.725510204081633e-05, "loss": 2.422, "step": 121500 }, { "FLOPS loss": 0.05349961295723915, "L0_d": 1091.36, "MLM loss": 2.2384302616119385, "epoch": 1.35, "step": 121999 }, { "epoch": 1.35, "learning_rate": 7.71530612244898e-05, "loss": 2.4209, "step": 122000 }, { "FLOPS loss": 0.06087294593453407, "L0_d": 972.55, "MLM loss": 2.5149126052856445, "epoch": 1.36, "step": 122499 }, { "epoch": 1.36, "learning_rate": 7.705102040816327e-05, "loss": 2.4214, "step": 122500 }, { "FLOPS loss": 0.0475175641477108, "L0_d": 819.0, "MLM loss": 2.323509693145752, "epoch": 1.37, "step": 122999 }, { "epoch": 1.37, "learning_rate": 7.694897959183673e-05, "loss": 2.4217, "step": 123000 }, { "FLOPS loss": 0.06220564991235733, "L0_d": 1108.77, "MLM loss": 2.431415319442749, "epoch": 1.37, "step": 123499 }, { "epoch": 1.37, "learning_rate": 7.684714285714287e-05, "loss": 2.4184, "step": 123500 }, { "FLOPS loss": 0.055600717663764954, "L0_d": 877.47, "MLM loss": 2.3827061653137207, "epoch": 1.38, "step": 123999 }, { "epoch": 1.38, "learning_rate": 7.674510204081633e-05, "loss": 2.4114, "step": 124000 }, { "FLOPS loss": 0.0523659884929657, "L0_d": 956.22, "MLM loss": 2.3927161693573, "epoch": 1.38, "step": 124499 }, { "epoch": 1.38, "learning_rate": 7.66430612244898e-05, "loss": 2.416, "step": 124500 }, { "FLOPS loss": 0.05690861493349075, "L0_d": 1023.48, "MLM loss": 2.380366802215576, "epoch": 1.39, "step": 124999 }, { "epoch": 1.39, "learning_rate": 7.654102040816326e-05, "loss": 2.4103, "step": 125000 }, { "FLOPS loss": 0.06576231867074966, "L0_d": 884.03, "MLM loss": 1.9601092338562012, "epoch": 1.39, "step": 125499 }, { "epoch": 1.39, "learning_rate": 7.64391836734694e-05, "loss": 2.4152, "step": 125500 }, { "FLOPS loss": 0.05620851367712021, "L0_d": 1105.39, "MLM loss": 2.4658303260803223, "epoch": 1.4, "step": 125999 }, { "epoch": 1.4, "learning_rate": 7.633714285714286e-05, "loss": 2.4143, "step": 126000 }, { "FLOPS loss": 0.046832144260406494, "L0_d": 791.67, "MLM loss": 2.4529666900634766, "epoch": 1.4, "step": 126499 }, { "epoch": 1.4, "learning_rate": 7.623510204081633e-05, "loss": 2.4059, "step": 126500 }, { "FLOPS loss": 0.055598534643650055, "L0_d": 810.78, "MLM loss": 2.3404593467712402, "epoch": 1.41, "step": 126999 }, { "epoch": 1.41, "learning_rate": 7.61330612244898e-05, "loss": 2.4146, "step": 127000 }, { "FLOPS loss": 0.06119697540998459, "L0_d": 896.62, "MLM loss": 2.4626617431640625, "epoch": 1.41, "step": 127499 }, { "epoch": 1.41, "learning_rate": 7.603122448979592e-05, "loss": 2.4037, "step": 127500 }, { "FLOPS loss": 0.05591664835810661, "L0_d": 961.5, "MLM loss": 2.202563762664795, "epoch": 1.42, "step": 127999 }, { "epoch": 1.42, "learning_rate": 7.592918367346939e-05, "loss": 2.4098, "step": 128000 }, { "FLOPS loss": 0.05877549201250076, "L0_d": 992.0, "MLM loss": 2.315680503845215, "epoch": 1.43, "step": 128499 }, { "epoch": 1.43, "learning_rate": 7.582714285714287e-05, "loss": 2.4102, "step": 128500 }, { "FLOPS loss": 0.059685565531253815, "L0_d": 969.73, "MLM loss": 2.3781557083129883, "epoch": 1.43, "step": 128999 }, { "epoch": 1.43, "learning_rate": 7.572510204081633e-05, "loss": 2.4088, "step": 129000 }, { "FLOPS loss": 0.06136553734540939, "L0_d": 1121.27, "MLM loss": 2.2019951343536377, "epoch": 1.44, "step": 129499 }, { "epoch": 1.44, "learning_rate": 7.562326530612245e-05, "loss": 2.4062, "step": 129500 }, { "FLOPS loss": 0.07830975949764252, "L0_d": 1312.2, "MLM loss": 2.3899996280670166, "epoch": 1.44, "step": 129999 }, { "epoch": 1.44, "learning_rate": 7.552122448979592e-05, "loss": 2.408, "step": 130000 }, { "FLOPS loss": 0.050518009811639786, "L0_d": 1014.23, "MLM loss": 2.283228874206543, "epoch": 1.45, "step": 130499 }, { "epoch": 1.45, "learning_rate": 7.54191836734694e-05, "loss": 2.4039, "step": 130500 }, { "FLOPS loss": 0.04859454184770584, "L0_d": 916.28, "MLM loss": 2.256866216659546, "epoch": 1.45, "step": 130999 }, { "epoch": 1.45, "learning_rate": 7.531714285714286e-05, "loss": 2.4037, "step": 131000 }, { "FLOPS loss": 0.06589575111865997, "L0_d": 1096.56, "MLM loss": 2.4545130729675293, "epoch": 1.46, "step": 131499 }, { "epoch": 1.46, "learning_rate": 7.521510204081633e-05, "loss": 2.4047, "step": 131500 }, { "FLOPS loss": 0.05765146389603615, "L0_d": 905.36, "MLM loss": 2.353822708129883, "epoch": 1.46, "step": 131999 }, { "epoch": 1.46, "learning_rate": 7.51130612244898e-05, "loss": 2.4014, "step": 132000 }, { "FLOPS loss": 0.05679977312684059, "L0_d": 888.11, "MLM loss": 2.3280534744262695, "epoch": 1.47, "step": 132499 }, { "epoch": 1.47, "learning_rate": 7.501122448979591e-05, "loss": 2.4002, "step": 132500 }, { "FLOPS loss": 0.040607087314128876, "L0_d": 721.09, "MLM loss": 2.594395399093628, "epoch": 1.48, "step": 132999 }, { "epoch": 1.48, "learning_rate": 7.49091836734694e-05, "loss": 2.4027, "step": 133000 }, { "FLOPS loss": 0.044703006744384766, "L0_d": 649.12, "MLM loss": 2.31876802444458, "epoch": 1.48, "step": 133499 }, { "epoch": 1.48, "learning_rate": 7.480714285714287e-05, "loss": 2.4032, "step": 133500 }, { "FLOPS loss": 0.05919651314616203, "L0_d": 823.42, "MLM loss": 2.4674229621887207, "epoch": 1.49, "step": 133999 }, { "epoch": 1.49, "learning_rate": 7.470510204081633e-05, "loss": 2.3925, "step": 134000 }, { "FLOPS loss": 0.04900096356868744, "L0_d": 804.48, "MLM loss": 2.207056999206543, "epoch": 1.49, "step": 134499 }, { "epoch": 1.49, "learning_rate": 7.460326530612245e-05, "loss": 2.3962, "step": 134500 }, { "FLOPS loss": 0.059831418097019196, "L0_d": 950.27, "MLM loss": 2.32120418548584, "epoch": 1.5, "step": 134999 }, { "epoch": 1.5, "learning_rate": 7.450122448979592e-05, "loss": 2.3969, "step": 135000 }, { "FLOPS loss": 0.04586287960410118, "L0_d": 674.95, "MLM loss": 2.454714298248291, "epoch": 1.5, "step": 135499 }, { "epoch": 1.5, "learning_rate": 7.439918367346938e-05, "loss": 2.3921, "step": 135500 }, { "FLOPS loss": 0.05870771408081055, "L0_d": 813.62, "MLM loss": 2.2865943908691406, "epoch": 1.51, "step": 135999 }, { "epoch": 1.51, "learning_rate": 7.429714285714286e-05, "loss": 2.3934, "step": 136000 }, { "FLOPS loss": 0.054652053862810135, "L0_d": 868.48, "MLM loss": 2.2066802978515625, "epoch": 1.51, "step": 136499 }, { "epoch": 1.51, "learning_rate": 7.419530612244898e-05, "loss": 2.3958, "step": 136500 }, { "FLOPS loss": 0.059165891259908676, "L0_d": 922.73, "MLM loss": 2.372460126876831, "epoch": 1.52, "step": 136999 }, { "epoch": 1.52, "learning_rate": 7.409326530612245e-05, "loss": 2.3927, "step": 137000 }, { "FLOPS loss": 0.06188426911830902, "L0_d": 1192.22, "MLM loss": 2.4264793395996094, "epoch": 1.53, "step": 137499 }, { "epoch": 1.53, "learning_rate": 7.399122448979592e-05, "loss": 2.3914, "step": 137500 }, { "FLOPS loss": 0.0587141327559948, "L0_d": 911.11, "MLM loss": 2.366276264190674, "epoch": 1.53, "step": 137999 }, { "epoch": 1.53, "learning_rate": 7.388918367346939e-05, "loss": 2.39, "step": 138000 }, { "FLOPS loss": 0.05421224981546402, "L0_d": 825.61, "MLM loss": 2.2654175758361816, "epoch": 1.54, "step": 138499 }, { "epoch": 1.54, "learning_rate": 7.378714285714287e-05, "loss": 2.3901, "step": 138500 }, { "FLOPS loss": 0.05685987323522568, "L0_d": 870.36, "MLM loss": 2.435506820678711, "epoch": 1.54, "step": 138999 }, { "epoch": 1.54, "learning_rate": 7.368530612244899e-05, "loss": 2.389, "step": 139000 }, { "FLOPS loss": 0.05553717538714409, "L0_d": 826.09, "MLM loss": 2.2316811084747314, "epoch": 1.55, "step": 139499 }, { "epoch": 1.55, "learning_rate": 7.358326530612245e-05, "loss": 2.3889, "step": 139500 }, { "FLOPS loss": 0.06748484075069427, "L0_d": 1125.31, "MLM loss": 2.3541133403778076, "epoch": 1.55, "step": 139999 }, { "epoch": 1.55, "learning_rate": 7.348122448979592e-05, "loss": 2.3884, "step": 140000 }, { "FLOPS loss": 0.06781161576509476, "L0_d": 898.75, "MLM loss": 2.2068657875061035, "epoch": 1.56, "step": 140499 }, { "epoch": 1.56, "learning_rate": 7.337918367346938e-05, "loss": 2.3837, "step": 140500 }, { "FLOPS loss": 0.04954715818166733, "L0_d": 833.39, "MLM loss": 2.2823896408081055, "epoch": 1.56, "step": 140999 }, { "epoch": 1.56, "learning_rate": 7.327714285714286e-05, "loss": 2.3846, "step": 141000 }, { "FLOPS loss": 0.041550811380147934, "L0_d": 685.98, "MLM loss": 2.3595311641693115, "epoch": 1.57, "step": 141499 }, { "epoch": 1.57, "learning_rate": 7.317530612244898e-05, "loss": 2.3774, "step": 141500 }, { "FLOPS loss": 0.05444991961121559, "L0_d": 864.27, "MLM loss": 2.2505102157592773, "epoch": 1.58, "step": 141999 }, { "epoch": 1.58, "learning_rate": 7.307326530612245e-05, "loss": 2.3848, "step": 142000 }, { "FLOPS loss": 0.07878141105175018, "L0_d": 1153.94, "MLM loss": 2.34080171585083, "epoch": 1.58, "step": 142499 }, { "epoch": 1.58, "learning_rate": 7.297122448979592e-05, "loss": 2.3803, "step": 142500 }, { "FLOPS loss": 0.040897272527217865, "L0_d": 729.64, "MLM loss": 2.431034564971924, "epoch": 1.59, "step": 142999 }, { "epoch": 1.59, "learning_rate": 7.286918367346939e-05, "loss": 2.3822, "step": 143000 }, { "FLOPS loss": 0.06469119340181351, "L0_d": 1041.81, "MLM loss": 2.3280398845672607, "epoch": 1.59, "step": 143499 }, { "epoch": 1.59, "learning_rate": 7.276714285714285e-05, "loss": 2.3822, "step": 143500 }, { "FLOPS loss": 0.05441708117723465, "L0_d": 890.61, "MLM loss": 2.393052339553833, "epoch": 1.6, "step": 143999 }, { "epoch": 1.6, "learning_rate": 7.266530612244899e-05, "loss": 2.3805, "step": 144000 }, { "FLOPS loss": 0.047435831278562546, "L0_d": 1026.31, "MLM loss": 2.291907548904419, "epoch": 1.6, "step": 144499 }, { "epoch": 1.6, "learning_rate": 7.256326530612245e-05, "loss": 2.3795, "step": 144500 }, { "FLOPS loss": 0.051319804042577744, "L0_d": 953.59, "MLM loss": 2.2784931659698486, "epoch": 1.61, "step": 144999 }, { "epoch": 1.61, "learning_rate": 7.246122448979592e-05, "loss": 2.3739, "step": 145000 }, { "FLOPS loss": 0.05857393145561218, "L0_d": 1010.31, "MLM loss": 2.5369725227355957, "epoch": 1.61, "step": 145499 }, { "epoch": 1.61, "learning_rate": 7.23591836734694e-05, "loss": 2.3754, "step": 145500 }, { "FLOPS loss": 0.050010714679956436, "L0_d": 864.45, "MLM loss": 2.249300956726074, "epoch": 1.62, "step": 145999 }, { "epoch": 1.62, "learning_rate": 7.225734693877552e-05, "loss": 2.3742, "step": 146000 }, { "FLOPS loss": 0.0592932365834713, "L0_d": 894.97, "MLM loss": 2.320887565612793, "epoch": 1.63, "step": 146499 }, { "epoch": 1.63, "learning_rate": 7.215530612244898e-05, "loss": 2.372, "step": 146500 }, { "FLOPS loss": 0.07161041349172592, "L0_d": 947.64, "MLM loss": 2.266634464263916, "epoch": 1.63, "step": 146999 }, { "epoch": 1.63, "learning_rate": 7.205326530612246e-05, "loss": 2.372, "step": 147000 }, { "FLOPS loss": 0.04668963700532913, "L0_d": 843.88, "MLM loss": 2.2491867542266846, "epoch": 1.64, "step": 147499 }, { "epoch": 1.64, "learning_rate": 7.195122448979592e-05, "loss": 2.371, "step": 147500 }, { "FLOPS loss": 0.05300503224134445, "L0_d": 850.39, "MLM loss": 2.54996395111084, "epoch": 1.64, "step": 147999 }, { "epoch": 1.64, "learning_rate": 7.184938775510204e-05, "loss": 2.368, "step": 148000 }, { "FLOPS loss": 0.06603309512138367, "L0_d": 1049.52, "MLM loss": 2.3379313945770264, "epoch": 1.65, "step": 148499 }, { "epoch": 1.65, "learning_rate": 7.174734693877552e-05, "loss": 2.3689, "step": 148500 }, { "FLOPS loss": 0.049901194870471954, "L0_d": 757.91, "MLM loss": 2.2603020668029785, "epoch": 1.65, "step": 148999 }, { "epoch": 1.65, "learning_rate": 7.164551020408163e-05, "loss": 2.3688, "step": 149000 }, { "FLOPS loss": 0.05954191833734512, "L0_d": 1151.7, "MLM loss": 2.401437282562256, "epoch": 1.66, "step": 149499 }, { "epoch": 1.66, "learning_rate": 7.154346938775511e-05, "loss": 2.3678, "step": 149500 }, { "FLOPS loss": 0.05292873829603195, "L0_d": 924.94, "MLM loss": 2.3402490615844727, "epoch": 1.66, "step": 149999 }, { "epoch": 1.66, "learning_rate": 7.144142857142857e-05, "loss": 2.3702, "step": 150000 }, { "FLOPS loss": 0.06479555368423462, "L0_d": 912.72, "MLM loss": 2.2263669967651367, "epoch": 1.67, "step": 150499 }, { "epoch": 1.67, "learning_rate": 7.133938775510205e-05, "loss": 2.3668, "step": 150500 }, { "FLOPS loss": 0.0632433220744133, "L0_d": 1091.19, "MLM loss": 2.389559030532837, "epoch": 1.68, "step": 150999 }, { "epoch": 1.68, "learning_rate": 7.123734693877552e-05, "loss": 2.3642, "step": 151000 }, { "FLOPS loss": 0.04530233144760132, "L0_d": 684.45, "MLM loss": 2.3026747703552246, "epoch": 1.68, "step": 151499 }, { "epoch": 1.68, "learning_rate": 7.113530612244898e-05, "loss": 2.3629, "step": 151500 }, { "FLOPS loss": 0.05753832310438156, "L0_d": 853.69, "MLM loss": 2.2215981483459473, "epoch": 1.69, "step": 151999 }, { "epoch": 1.69, "learning_rate": 7.103326530612246e-05, "loss": 2.3639, "step": 152000 }, { "FLOPS loss": 0.05592764914035797, "L0_d": 818.31, "MLM loss": 2.249408483505249, "epoch": 1.69, "step": 152499 }, { "epoch": 1.69, "learning_rate": 7.093122448979592e-05, "loss": 2.365, "step": 152500 }, { "FLOPS loss": 0.057692211121320724, "L0_d": 892.7, "MLM loss": 2.1683812141418457, "epoch": 1.7, "step": 152999 }, { "epoch": 1.7, "learning_rate": 7.082938775510204e-05, "loss": 2.3645, "step": 153000 }, { "FLOPS loss": 0.07237580418586731, "L0_d": 1128.23, "MLM loss": 2.3035595417022705, "epoch": 1.7, "step": 153499 }, { "epoch": 1.7, "learning_rate": 7.072734693877552e-05, "loss": 2.3598, "step": 153500 }, { "FLOPS loss": 0.044418223202228546, "L0_d": 640.14, "MLM loss": 2.150951623916626, "epoch": 1.71, "step": 153999 }, { "epoch": 1.71, "learning_rate": 7.062530612244899e-05, "loss": 2.3617, "step": 154000 }, { "FLOPS loss": 0.06442984193563461, "L0_d": 1061.12, "MLM loss": 2.460023880004883, "epoch": 1.71, "step": 154499 }, { "epoch": 1.71, "learning_rate": 7.052326530612245e-05, "loss": 2.3555, "step": 154500 }, { "FLOPS loss": 0.04964172840118408, "L0_d": 770.52, "MLM loss": 2.370302200317383, "epoch": 1.72, "step": 154999 }, { "epoch": 1.72, "learning_rate": 7.042122448979593e-05, "loss": 2.3581, "step": 155000 }, { "FLOPS loss": 0.06309588253498077, "L0_d": 1050.2, "MLM loss": 2.179468870162964, "epoch": 1.73, "step": 155499 }, { "epoch": 1.73, "learning_rate": 7.031938775510204e-05, "loss": 2.3627, "step": 155500 }, { "FLOPS loss": 0.057224247604608536, "L0_d": 829.45, "MLM loss": 2.290663719177246, "epoch": 1.73, "step": 155999 }, { "epoch": 1.73, "learning_rate": 7.021734693877551e-05, "loss": 2.3548, "step": 156000 }, { "FLOPS loss": 0.07096954435110092, "L0_d": 1124.5, "MLM loss": 2.3333535194396973, "epoch": 1.74, "step": 156499 }, { "epoch": 1.74, "learning_rate": 7.011530612244899e-05, "loss": 2.3589, "step": 156500 }, { "FLOPS loss": 0.05174532160162926, "L0_d": 795.05, "MLM loss": 2.4752039909362793, "epoch": 1.74, "step": 156999 }, { "epoch": 1.74, "learning_rate": 7.001326530612246e-05, "loss": 2.3555, "step": 157000 }, { "FLOPS loss": 0.06745199114084244, "L0_d": 1105.53, "MLM loss": 2.2967159748077393, "epoch": 1.75, "step": 157499 }, { "epoch": 1.75, "learning_rate": 6.991122448979592e-05, "loss": 2.3567, "step": 157500 }, { "FLOPS loss": 0.05396946147084236, "L0_d": 801.8, "MLM loss": 2.258889675140381, "epoch": 1.75, "step": 157999 }, { "epoch": 1.75, "learning_rate": 6.980938775510204e-05, "loss": 2.3518, "step": 158000 }, { "FLOPS loss": 0.05986456200480461, "L0_d": 1007.48, "MLM loss": 2.3621294498443604, "epoch": 1.76, "step": 158499 }, { "epoch": 1.76, "learning_rate": 6.970734693877551e-05, "loss": 2.3567, "step": 158500 }, { "FLOPS loss": 0.059486862272024155, "L0_d": 1016.73, "MLM loss": 2.26704478263855, "epoch": 1.76, "step": 158999 }, { "epoch": 1.76, "learning_rate": 6.960530612244899e-05, "loss": 2.3512, "step": 159000 }, { "FLOPS loss": 0.06300424784421921, "L0_d": 1016.45, "MLM loss": 2.3884778022766113, "epoch": 1.77, "step": 159499 }, { "epoch": 1.77, "learning_rate": 6.950326530612245e-05, "loss": 2.3546, "step": 159500 }, { "FLOPS loss": 0.04583696275949478, "L0_d": 842.11, "MLM loss": 2.2775931358337402, "epoch": 1.78, "step": 159999 }, { "epoch": 1.78, "learning_rate": 6.940142857142857e-05, "loss": 2.3491, "step": 160000 }, { "FLOPS loss": 0.05980045720934868, "L0_d": 886.28, "MLM loss": 2.3207318782806396, "epoch": 1.78, "step": 160499 }, { "epoch": 1.78, "learning_rate": 6.929938775510204e-05, "loss": 2.3556, "step": 160500 }, { "FLOPS loss": 0.062243539839982986, "L0_d": 986.33, "MLM loss": 2.456385850906372, "epoch": 1.79, "step": 160999 }, { "epoch": 1.79, "learning_rate": 6.919734693877551e-05, "loss": 2.35, "step": 161000 }, { "FLOPS loss": 0.04177238792181015, "L0_d": 839.23, "MLM loss": 2.0877509117126465, "epoch": 1.79, "step": 161499 }, { "epoch": 1.79, "learning_rate": 6.909530612244899e-05, "loss": 2.3476, "step": 161500 }, { "FLOPS loss": 0.0654568001627922, "L0_d": 1033.69, "MLM loss": 2.288259267807007, "epoch": 1.8, "step": 161999 }, { "epoch": 1.8, "learning_rate": 6.89934693877551e-05, "loss": 2.3504, "step": 162000 }, { "FLOPS loss": 0.05908384919166565, "L0_d": 1152.55, "MLM loss": 2.3739535808563232, "epoch": 1.8, "step": 162499 }, { "epoch": 1.8, "learning_rate": 6.889142857142858e-05, "loss": 2.3493, "step": 162500 }, { "FLOPS loss": 0.0644502118229866, "L0_d": 818.25, "MLM loss": 2.2964179515838623, "epoch": 1.81, "step": 162999 }, { "epoch": 1.81, "learning_rate": 6.878938775510204e-05, "loss": 2.3488, "step": 163000 }, { "FLOPS loss": 0.061816513538360596, "L0_d": 980.7, "MLM loss": 2.315463066101074, "epoch": 1.81, "step": 163499 }, { "epoch": 1.81, "learning_rate": 6.868734693877551e-05, "loss": 2.3417, "step": 163500 }, { "FLOPS loss": 0.0677734836935997, "L0_d": 1017.97, "MLM loss": 2.390178918838501, "epoch": 1.82, "step": 163999 }, { "epoch": 1.82, "learning_rate": 6.858530612244897e-05, "loss": 2.3506, "step": 164000 }, { "FLOPS loss": 0.060079894959926605, "L0_d": 1071.78, "MLM loss": 2.1683859825134277, "epoch": 1.83, "step": 164499 }, { "epoch": 1.83, "learning_rate": 6.84834693877551e-05, "loss": 2.3437, "step": 164500 }, { "FLOPS loss": 0.054492004215717316, "L0_d": 792.84, "MLM loss": 2.3717727661132812, "epoch": 1.83, "step": 164999 }, { "epoch": 1.83, "learning_rate": 6.838142857142857e-05, "loss": 2.3519, "step": 165000 }, { "FLOPS loss": 0.06225438043475151, "L0_d": 1095.06, "MLM loss": 2.20212984085083, "epoch": 1.84, "step": 165499 }, { "epoch": 1.84, "learning_rate": 6.827938775510204e-05, "loss": 2.3418, "step": 165500 }, { "FLOPS loss": 0.05998671054840088, "L0_d": 870.19, "MLM loss": 2.3553786277770996, "epoch": 1.84, "step": 165999 }, { "epoch": 1.84, "learning_rate": 6.817734693877551e-05, "loss": 2.3406, "step": 166000 }, { "FLOPS loss": 0.05350930616259575, "L0_d": 911.39, "MLM loss": 2.0919508934020996, "epoch": 1.85, "step": 166499 }, { "epoch": 1.85, "learning_rate": 6.807551020408163e-05, "loss": 2.3425, "step": 166500 }, { "FLOPS loss": 0.05402107909321785, "L0_d": 822.58, "MLM loss": 2.3293726444244385, "epoch": 1.85, "step": 166999 }, { "epoch": 1.85, "learning_rate": 6.797346938775511e-05, "loss": 2.3395, "step": 167000 }, { "FLOPS loss": 0.058944445103406906, "L0_d": 944.94, "MLM loss": 2.1397345066070557, "epoch": 1.86, "step": 167499 }, { "epoch": 1.86, "learning_rate": 6.787142857142858e-05, "loss": 2.3407, "step": 167500 }, { "FLOPS loss": 0.062381766736507416, "L0_d": 924.25, "MLM loss": 2.125985622406006, "epoch": 1.86, "step": 167999 }, { "epoch": 1.86, "learning_rate": 6.776938775510204e-05, "loss": 2.3371, "step": 168000 }, { "FLOPS loss": 0.03963374346494675, "L0_d": 523.89, "MLM loss": 2.1006557941436768, "epoch": 1.87, "step": 168499 }, { "epoch": 1.87, "learning_rate": 6.766734693877551e-05, "loss": 2.3382, "step": 168500 }, { "FLOPS loss": 0.058371152728796005, "L0_d": 919.62, "MLM loss": 2.212615728378296, "epoch": 1.88, "step": 168999 }, { "epoch": 1.88, "learning_rate": 6.756551020408164e-05, "loss": 2.3375, "step": 169000 }, { "FLOPS loss": 0.05565766617655754, "L0_d": 1250.36, "MLM loss": 2.3127620220184326, "epoch": 1.88, "step": 169499 }, { "epoch": 1.88, "learning_rate": 6.74634693877551e-05, "loss": 2.3409, "step": 169500 }, { "FLOPS loss": 0.06251253932714462, "L0_d": 855.45, "MLM loss": 2.2198729515075684, "epoch": 1.89, "step": 169999 }, { "epoch": 1.89, "learning_rate": 6.736142857142857e-05, "loss": 2.3354, "step": 170000 }, { "FLOPS loss": 0.05792997404932976, "L0_d": 1057.67, "MLM loss": 2.2349019050598145, "epoch": 1.89, "step": 170499 }, { "epoch": 1.89, "learning_rate": 6.725938775510205e-05, "loss": 2.3362, "step": 170500 }, { "FLOPS loss": 0.04008607938885689, "L0_d": 763.42, "MLM loss": 2.175825357437134, "epoch": 1.9, "step": 170999 }, { "epoch": 1.9, "learning_rate": 6.715755102040817e-05, "loss": 2.3351, "step": 171000 }, { "FLOPS loss": 0.05475396662950516, "L0_d": 912.17, "MLM loss": 2.2616868019104004, "epoch": 1.9, "step": 171499 }, { "epoch": 1.9, "learning_rate": 6.705551020408163e-05, "loss": 2.3357, "step": 171500 }, { "FLOPS loss": 0.06247398629784584, "L0_d": 871.94, "MLM loss": 2.288843870162964, "epoch": 1.91, "step": 171999 }, { "epoch": 1.91, "learning_rate": 6.695346938775511e-05, "loss": 2.3371, "step": 172000 }, { "FLOPS loss": 0.047593921422958374, "L0_d": 804.69, "MLM loss": 2.204817771911621, "epoch": 1.91, "step": 172499 }, { "epoch": 1.91, "learning_rate": 6.685142857142858e-05, "loss": 2.335, "step": 172500 }, { "FLOPS loss": 0.05214070901274681, "L0_d": 821.36, "MLM loss": 2.283116579055786, "epoch": 1.92, "step": 172999 }, { "epoch": 1.92, "learning_rate": 6.67495918367347e-05, "loss": 2.3379, "step": 173000 }, { "FLOPS loss": 0.06587202847003937, "L0_d": 1045.66, "MLM loss": 2.385770797729492, "epoch": 1.93, "step": 173499 }, { "epoch": 1.93, "learning_rate": 6.664755102040816e-05, "loss": 2.335, "step": 173500 }, { "FLOPS loss": 0.06492248177528381, "L0_d": 1014.66, "MLM loss": 2.3243205547332764, "epoch": 1.93, "step": 173999 }, { "epoch": 1.93, "learning_rate": 6.654551020408164e-05, "loss": 2.3323, "step": 174000 }, { "FLOPS loss": 0.05745987221598625, "L0_d": 854.84, "MLM loss": 2.2145919799804688, "epoch": 1.94, "step": 174499 }, { "epoch": 1.94, "learning_rate": 6.64434693877551e-05, "loss": 2.3357, "step": 174500 }, { "FLOPS loss": 0.05205972120165825, "L0_d": 1022.62, "MLM loss": 2.448801040649414, "epoch": 1.94, "step": 174999 }, { "epoch": 1.94, "learning_rate": 6.634163265306123e-05, "loss": 2.332, "step": 175000 }, { "FLOPS loss": 0.05591163784265518, "L0_d": 1097.02, "MLM loss": 2.198014974594116, "epoch": 1.95, "step": 175499 }, { "epoch": 1.95, "learning_rate": 6.623959183673469e-05, "loss": 2.3321, "step": 175500 }, { "FLOPS loss": 0.05115339159965515, "L0_d": 881.53, "MLM loss": 2.1630940437316895, "epoch": 1.95, "step": 175999 }, { "epoch": 1.95, "learning_rate": 6.613755102040816e-05, "loss": 2.3309, "step": 176000 }, { "FLOPS loss": 0.055124856531620026, "L0_d": 797.61, "MLM loss": 2.266045093536377, "epoch": 1.96, "step": 176499 }, { "epoch": 1.96, "learning_rate": 6.603551020408165e-05, "loss": 2.3311, "step": 176500 }, { "FLOPS loss": 0.049748100340366364, "L0_d": 1038.91, "MLM loss": 2.461071491241455, "epoch": 1.96, "step": 176999 }, { "epoch": 1.96, "learning_rate": 6.593346938775511e-05, "loss": 2.3288, "step": 177000 }, { "FLOPS loss": 0.049052782356739044, "L0_d": 870.12, "MLM loss": 2.2625961303710938, "epoch": 1.97, "step": 177499 }, { "epoch": 1.97, "learning_rate": 6.583163265306123e-05, "loss": 2.3276, "step": 177500 }, { "FLOPS loss": 0.06075240299105644, "L0_d": 1010.81, "MLM loss": 2.2992048263549805, "epoch": 1.98, "step": 177999 }, { "epoch": 1.98, "learning_rate": 6.57295918367347e-05, "loss": 2.3239, "step": 178000 }, { "FLOPS loss": 0.061753179877996445, "L0_d": 926.36, "MLM loss": 2.271366596221924, "epoch": 1.98, "step": 178499 }, { "epoch": 1.98, "learning_rate": 6.562755102040816e-05, "loss": 2.3278, "step": 178500 }, { "FLOPS loss": 0.047498561441898346, "L0_d": 754.09, "MLM loss": 2.2163281440734863, "epoch": 1.99, "step": 178999 }, { "epoch": 1.99, "learning_rate": 6.552551020408163e-05, "loss": 2.3276, "step": 179000 }, { "FLOPS loss": 0.06929969042539597, "L0_d": 1038.48, "MLM loss": 2.3445746898651123, "epoch": 1.99, "step": 179499 }, { "epoch": 1.99, "learning_rate": 6.542367346938776e-05, "loss": 2.3256, "step": 179500 }, { "FLOPS loss": 0.05312717333436012, "L0_d": 894.47, "MLM loss": 2.1745967864990234, "epoch": 2.0, "step": 179999 }, { "epoch": 2.0, "learning_rate": 6.532163265306123e-05, "loss": 2.3225, "step": 180000 }, { "FLOPS loss": 0.060288943350315094, "L0_d": 1007.09, "MLM loss": 2.2938647270202637, "epoch": 2.0, "step": 180499 }, { "epoch": 2.0, "learning_rate": 6.521959183673469e-05, "loss": 2.3205, "step": 180500 }, { "FLOPS loss": 0.053670670837163925, "L0_d": 820.48, "MLM loss": 2.2767763137817383, "epoch": 2.01, "step": 180999 }, { "epoch": 2.01, "learning_rate": 6.511755102040817e-05, "loss": 2.3219, "step": 181000 }, { "FLOPS loss": 0.06285814940929413, "L0_d": 1101.7, "MLM loss": 2.3793036937713623, "epoch": 2.01, "step": 181499 }, { "epoch": 2.01, "learning_rate": 6.501571428571429e-05, "loss": 2.3188, "step": 181500 }, { "FLOPS loss": 0.06652138382196426, "L0_d": 1146.67, "MLM loss": 2.466308116912842, "epoch": 2.02, "step": 181999 }, { "epoch": 2.02, "learning_rate": 6.491367346938775e-05, "loss": 2.3168, "step": 182000 }, { "FLOPS loss": 0.04534171149134636, "L0_d": 667.5, "MLM loss": 2.177887439727783, "epoch": 2.03, "step": 182499 }, { "epoch": 2.03, "learning_rate": 6.481163265306123e-05, "loss": 2.3219, "step": 182500 }, { "FLOPS loss": 0.05080340430140495, "L0_d": 990.16, "MLM loss": 2.230391502380371, "epoch": 2.03, "step": 182999 }, { "epoch": 2.03, "learning_rate": 6.47095918367347e-05, "loss": 2.3202, "step": 183000 }, { "FLOPS loss": 0.06440580636262894, "L0_d": 1080.48, "MLM loss": 2.2491097450256348, "epoch": 2.04, "step": 183499 }, { "epoch": 2.04, "learning_rate": 6.460755102040816e-05, "loss": 2.3213, "step": 183500 }, { "FLOPS loss": 0.06828920543193817, "L0_d": 1214.25, "MLM loss": 2.1256892681121826, "epoch": 2.04, "step": 183999 }, { "epoch": 2.04, "learning_rate": 6.45057142857143e-05, "loss": 2.3173, "step": 184000 }, { "FLOPS loss": 0.06206744164228439, "L0_d": 928.86, "MLM loss": 2.327916145324707, "epoch": 2.05, "step": 184499 }, { "epoch": 2.05, "learning_rate": 6.440367346938776e-05, "loss": 2.3121, "step": 184500 }, { "FLOPS loss": 0.05971044301986694, "L0_d": 1026.25, "MLM loss": 2.4267261028289795, "epoch": 2.05, "step": 184999 }, { "epoch": 2.05, "learning_rate": 6.430163265306123e-05, "loss": 2.3156, "step": 185000 }, { "FLOPS loss": 0.06181873753666878, "L0_d": 1011.67, "MLM loss": 2.2486371994018555, "epoch": 2.06, "step": 185499 }, { "epoch": 2.06, "learning_rate": 6.419959183673469e-05, "loss": 2.3161, "step": 185500 }, { "FLOPS loss": 0.05726119130849838, "L0_d": 1031.81, "MLM loss": 2.1840081214904785, "epoch": 2.06, "step": 185999 }, { "epoch": 2.06, "learning_rate": 6.409755102040817e-05, "loss": 2.3159, "step": 186000 }, { "FLOPS loss": 0.04867648333311081, "L0_d": 830.73, "MLM loss": 2.256422758102417, "epoch": 2.07, "step": 186499 }, { "epoch": 2.07, "learning_rate": 6.399571428571429e-05, "loss": 2.308, "step": 186500 }, { "FLOPS loss": 0.06085360050201416, "L0_d": 896.8, "MLM loss": 2.2740402221679688, "epoch": 2.08, "step": 186999 }, { "epoch": 2.08, "learning_rate": 6.389367346938775e-05, "loss": 2.3114, "step": 187000 }, { "FLOPS loss": 0.05795580893754959, "L0_d": 1208.81, "MLM loss": 2.5151615142822266, "epoch": 2.08, "step": 187499 }, { "epoch": 2.08, "learning_rate": 6.379163265306123e-05, "loss": 2.3177, "step": 187500 }, { "FLOPS loss": 0.048575032502412796, "L0_d": 920.72, "MLM loss": 2.444376230239868, "epoch": 2.09, "step": 187999 }, { "epoch": 2.09, "learning_rate": 6.36895918367347e-05, "loss": 2.3157, "step": 188000 }, { "FLOPS loss": 0.061119645833969116, "L0_d": 1058.89, "MLM loss": 2.291041374206543, "epoch": 2.09, "step": 188499 }, { "epoch": 2.09, "learning_rate": 6.358775510204083e-05, "loss": 2.3079, "step": 188500 }, { "FLOPS loss": 0.05495110899209976, "L0_d": 744.88, "MLM loss": 2.337536573410034, "epoch": 2.1, "step": 188999 }, { "epoch": 2.1, "learning_rate": 6.34857142857143e-05, "loss": 2.3057, "step": 189000 }, { "FLOPS loss": 0.05809200182557106, "L0_d": 1016.22, "MLM loss": 2.25266170501709, "epoch": 2.1, "step": 189499 }, { "epoch": 2.1, "learning_rate": 6.338367346938776e-05, "loss": 2.3121, "step": 189500 }, { "FLOPS loss": 0.05739111080765724, "L0_d": 816.36, "MLM loss": 2.150939464569092, "epoch": 2.11, "step": 189999 }, { "epoch": 2.11, "learning_rate": 6.328163265306122e-05, "loss": 2.3127, "step": 190000 }, { "FLOPS loss": 0.06043001636862755, "L0_d": 896.67, "MLM loss": 2.1905641555786133, "epoch": 2.11, "step": 190499 }, { "epoch": 2.11, "learning_rate": 6.317979591836735e-05, "loss": 2.311, "step": 190500 }, { "FLOPS loss": 0.03632473200559616, "L0_d": 743.94, "MLM loss": 2.3766369819641113, "epoch": 2.12, "step": 190999 }, { "epoch": 2.12, "learning_rate": 6.307775510204081e-05, "loss": 2.3098, "step": 191000 }, { "FLOPS loss": 0.0524275116622448, "L0_d": 857.64, "MLM loss": 2.168391227722168, "epoch": 2.13, "step": 191499 }, { "epoch": 2.13, "learning_rate": 6.297571428571429e-05, "loss": 2.3129, "step": 191500 }, { "FLOPS loss": 0.05898985266685486, "L0_d": 1079.5, "MLM loss": 2.3129591941833496, "epoch": 2.13, "step": 191999 }, { "epoch": 2.13, "learning_rate": 6.287367346938777e-05, "loss": 2.3092, "step": 192000 }, { "FLOPS loss": 0.067794568836689, "L0_d": 1121.7, "MLM loss": 2.131967067718506, "epoch": 2.14, "step": 192499 }, { "epoch": 2.14, "learning_rate": 6.277163265306123e-05, "loss": 2.3062, "step": 192500 }, { "FLOPS loss": 0.0543605200946331, "L0_d": 971.77, "MLM loss": 2.5032875537872314, "epoch": 2.14, "step": 192999 }, { "epoch": 2.14, "learning_rate": 6.266979591836735e-05, "loss": 2.3098, "step": 193000 }, { "FLOPS loss": 0.0840449333190918, "L0_d": 1311.22, "MLM loss": 2.2395904064178467, "epoch": 2.15, "step": 193499 }, { "epoch": 2.15, "learning_rate": 6.256775510204082e-05, "loss": 2.3067, "step": 193500 }, { "FLOPS loss": 0.05639391764998436, "L0_d": 897.28, "MLM loss": 2.252620220184326, "epoch": 2.15, "step": 193999 }, { "epoch": 2.15, "learning_rate": 6.24657142857143e-05, "loss": 2.3034, "step": 194000 }, { "FLOPS loss": 0.050509653985500336, "L0_d": 826.75, "MLM loss": 2.1555681228637695, "epoch": 2.16, "step": 194499 }, { "epoch": 2.16, "learning_rate": 6.236367346938776e-05, "loss": 2.3048, "step": 194500 }, { "FLOPS loss": 0.043410684913396835, "L0_d": 886.33, "MLM loss": 2.1389873027801514, "epoch": 2.16, "step": 194999 }, { "epoch": 2.16, "learning_rate": 6.226183673469388e-05, "loss": 2.305, "step": 195000 }, { "FLOPS loss": 0.06308072060346603, "L0_d": 1046.8, "MLM loss": 2.2248311042785645, "epoch": 2.17, "step": 195499 }, { "epoch": 2.17, "learning_rate": 6.215979591836735e-05, "loss": 2.3016, "step": 195500 }, { "FLOPS loss": 0.060886088758707047, "L0_d": 881.0, "MLM loss": 2.2729129791259766, "epoch": 2.18, "step": 195999 }, { "epoch": 2.18, "learning_rate": 6.205775510204081e-05, "loss": 2.3037, "step": 196000 }, { "FLOPS loss": 0.04616640508174896, "L0_d": 751.09, "MLM loss": 2.3302903175354004, "epoch": 2.18, "step": 196499 }, { "epoch": 2.18, "learning_rate": 6.195571428571429e-05, "loss": 2.3021, "step": 196500 }, { "FLOPS loss": 0.0533144474029541, "L0_d": 730.53, "MLM loss": 2.3340904712677, "epoch": 2.19, "step": 196999 }, { "epoch": 2.19, "learning_rate": 6.185367346938777e-05, "loss": 2.3, "step": 197000 }, { "FLOPS loss": 0.0651758685708046, "L0_d": 1033.27, "MLM loss": 2.197570562362671, "epoch": 2.19, "step": 197499 }, { "epoch": 2.19, "learning_rate": 6.175183673469387e-05, "loss": 2.3028, "step": 197500 }, { "FLOPS loss": 0.06367243826389313, "L0_d": 1210.45, "MLM loss": 2.0054616928100586, "epoch": 2.2, "step": 197999 }, { "epoch": 2.2, "learning_rate": 6.164979591836735e-05, "loss": 2.3, "step": 198000 }, { "FLOPS loss": 0.049146752804517746, "L0_d": 748.31, "MLM loss": 2.3555009365081787, "epoch": 2.2, "step": 198499 }, { "epoch": 2.2, "learning_rate": 6.154775510204082e-05, "loss": 2.3047, "step": 198500 }, { "FLOPS loss": 0.07421018183231354, "L0_d": 1404.91, "MLM loss": 2.2458107471466064, "epoch": 2.21, "step": 198999 }, { "epoch": 2.21, "learning_rate": 6.144591836734695e-05, "loss": 2.2964, "step": 199000 }, { "FLOPS loss": 0.06392641365528107, "L0_d": 1064.77, "MLM loss": 2.165522575378418, "epoch": 2.21, "step": 199499 }, { "epoch": 2.21, "learning_rate": 6.134387755102042e-05, "loss": 2.2984, "step": 199500 }, { "FLOPS loss": 0.05519779026508331, "L0_d": 761.55, "MLM loss": 2.3119521141052246, "epoch": 2.22, "step": 199999 }, { "epoch": 2.22, "learning_rate": 6.124183673469388e-05, "loss": 2.298, "step": 200000 }, { "FLOPS loss": 0.05917292460799217, "L0_d": 1020.19, "MLM loss": 2.28605318069458, "epoch": 2.23, "step": 200499 }, { "epoch": 2.23, "learning_rate": 6.113979591836734e-05, "loss": 2.3011, "step": 200500 }, { "FLOPS loss": 0.05990422144532204, "L0_d": 1064.44, "MLM loss": 2.1877198219299316, "epoch": 2.23, "step": 200999 }, { "epoch": 2.23, "learning_rate": 6.103775510204082e-05, "loss": 2.2959, "step": 201000 }, { "FLOPS loss": 0.059502195566892624, "L0_d": 1107.78, "MLM loss": 2.247224807739258, "epoch": 2.24, "step": 201499 }, { "epoch": 2.24, "learning_rate": 6.093571428571429e-05, "loss": 2.2975, "step": 201500 }, { "FLOPS loss": 0.04919683188199997, "L0_d": 834.72, "MLM loss": 2.1115541458129883, "epoch": 2.24, "step": 201999 }, { "epoch": 2.24, "learning_rate": 6.083367346938775e-05, "loss": 2.2948, "step": 202000 }, { "FLOPS loss": 0.0608077272772789, "L0_d": 965.47, "MLM loss": 2.1066982746124268, "epoch": 2.25, "step": 202499 }, { "epoch": 2.25, "learning_rate": 6.073163265306123e-05, "loss": 2.2942, "step": 202500 }, { "FLOPS loss": 0.06981346011161804, "L0_d": 1168.09, "MLM loss": 2.2556567192077637, "epoch": 2.25, "step": 202999 }, { "epoch": 2.25, "learning_rate": 6.062979591836735e-05, "loss": 2.2938, "step": 203000 }, { "FLOPS loss": 0.05991167947649956, "L0_d": 1007.27, "MLM loss": 2.290571451187134, "epoch": 2.26, "step": 203499 }, { "epoch": 2.26, "learning_rate": 6.0527755102040816e-05, "loss": 2.2958, "step": 203500 }, { "FLOPS loss": 0.049616504460573196, "L0_d": 788.38, "MLM loss": 2.096006155014038, "epoch": 2.26, "step": 203999 }, { "epoch": 2.26, "learning_rate": 6.042571428571429e-05, "loss": 2.2941, "step": 204000 }, { "FLOPS loss": 0.0658799558877945, "L0_d": 1039.0, "MLM loss": 2.158169746398926, "epoch": 2.27, "step": 204499 }, { "epoch": 2.27, "learning_rate": 6.032367346938775e-05, "loss": 2.2947, "step": 204500 }, { "FLOPS loss": 0.054312895983457565, "L0_d": 934.95, "MLM loss": 2.1070778369903564, "epoch": 2.28, "step": 204999 }, { "epoch": 2.28, "learning_rate": 6.022183673469388e-05, "loss": 2.2912, "step": 205000 }, { "FLOPS loss": 0.05599505454301834, "L0_d": 918.33, "MLM loss": 2.156792163848877, "epoch": 2.28, "step": 205499 }, { "epoch": 2.28, "learning_rate": 6.011979591836735e-05, "loss": 2.2962, "step": 205500 }, { "FLOPS loss": 0.0523684024810791, "L0_d": 815.58, "MLM loss": 2.2655773162841797, "epoch": 2.29, "step": 205999 }, { "epoch": 2.29, "learning_rate": 6.0017755102040816e-05, "loss": 2.2927, "step": 206000 }, { "FLOPS loss": 0.0660998523235321, "L0_d": 971.77, "MLM loss": 2.244384765625, "epoch": 2.29, "step": 206499 }, { "epoch": 2.29, "learning_rate": 5.991571428571429e-05, "loss": 2.2914, "step": 206500 }, { "FLOPS loss": 0.052455466240644455, "L0_d": 970.09, "MLM loss": 2.2161829471588135, "epoch": 2.3, "step": 206999 }, { "epoch": 2.3, "learning_rate": 5.9813877551020415e-05, "loss": 2.2883, "step": 207000 }, { "FLOPS loss": 0.0488029420375824, "L0_d": 827.94, "MLM loss": 2.03910493850708, "epoch": 2.3, "step": 207499 }, { "epoch": 2.3, "learning_rate": 5.971183673469388e-05, "loss": 2.2901, "step": 207500 }, { "FLOPS loss": 0.05921753868460655, "L0_d": 1082.95, "MLM loss": 2.266646385192871, "epoch": 2.31, "step": 207999 }, { "epoch": 2.31, "learning_rate": 5.960979591836735e-05, "loss": 2.2892, "step": 208000 }, { "FLOPS loss": 0.05265677347779274, "L0_d": 854.81, "MLM loss": 2.2230875492095947, "epoch": 2.31, "step": 208499 }, { "epoch": 2.31, "learning_rate": 5.9507755102040816e-05, "loss": 2.2859, "step": 208500 }, { "FLOPS loss": 0.05406523868441582, "L0_d": 829.95, "MLM loss": 2.2082207202911377, "epoch": 2.32, "step": 208999 }, { "epoch": 2.32, "learning_rate": 5.940571428571429e-05, "loss": 2.2897, "step": 209000 }, { "FLOPS loss": 0.050975997000932693, "L0_d": 852.61, "MLM loss": 2.1526894569396973, "epoch": 2.32, "step": 209499 }, { "epoch": 2.32, "learning_rate": 5.9303877551020415e-05, "loss": 2.2847, "step": 209500 }, { "FLOPS loss": 0.05317690223455429, "L0_d": 862.17, "MLM loss": 2.114109516143799, "epoch": 2.33, "step": 209999 }, { "epoch": 2.33, "learning_rate": 5.9201836734693886e-05, "loss": 2.2821, "step": 210000 }, { "FLOPS loss": 0.06353534013032913, "L0_d": 887.44, "MLM loss": 2.165651559829712, "epoch": 2.34, "step": 210499 }, { "epoch": 2.34, "learning_rate": 5.909979591836735e-05, "loss": 2.286, "step": 210500 }, { "FLOPS loss": 0.051628801971673965, "L0_d": 784.48, "MLM loss": 2.2152271270751953, "epoch": 2.34, "step": 210999 }, { "epoch": 2.34, "learning_rate": 5.899775510204082e-05, "loss": 2.2883, "step": 211000 }, { "FLOPS loss": 0.04350633546710014, "L0_d": 773.44, "MLM loss": 2.0546042919158936, "epoch": 2.35, "step": 211499 }, { "epoch": 2.35, "learning_rate": 5.8895918367346936e-05, "loss": 2.2861, "step": 211500 }, { "FLOPS loss": 0.06513429433107376, "L0_d": 1199.59, "MLM loss": 2.132012367248535, "epoch": 2.35, "step": 211999 }, { "epoch": 2.35, "learning_rate": 5.8793877551020414e-05, "loss": 2.2864, "step": 212000 }, { "FLOPS loss": 0.05980905517935753, "L0_d": 844.67, "MLM loss": 2.176804542541504, "epoch": 2.36, "step": 212499 }, { "epoch": 2.36, "learning_rate": 5.8691836734693886e-05, "loss": 2.2882, "step": 212500 }, { "FLOPS loss": 0.0509488582611084, "L0_d": 738.47, "MLM loss": 2.0938873291015625, "epoch": 2.36, "step": 212999 }, { "epoch": 2.36, "learning_rate": 5.858979591836735e-05, "loss": 2.2848, "step": 213000 }, { "FLOPS loss": 0.05314105004072189, "L0_d": 822.69, "MLM loss": 2.2353482246398926, "epoch": 2.37, "step": 213499 }, { "epoch": 2.37, "learning_rate": 5.848775510204082e-05, "loss": 2.2816, "step": 213500 }, { "FLOPS loss": 0.06591460108757019, "L0_d": 958.62, "MLM loss": 2.1384596824645996, "epoch": 2.37, "step": 213999 }, { "epoch": 2.37, "learning_rate": 5.838571428571429e-05, "loss": 2.2821, "step": 214000 }, { "FLOPS loss": 0.061543118208646774, "L0_d": 908.31, "MLM loss": 2.325132369995117, "epoch": 2.38, "step": 214499 }, { "epoch": 2.38, "learning_rate": 5.8283877551020414e-05, "loss": 2.2816, "step": 214500 }, { "FLOPS loss": 0.05648140609264374, "L0_d": 1002.91, "MLM loss": 2.2039623260498047, "epoch": 2.39, "step": 214999 }, { "epoch": 2.39, "learning_rate": 5.8181836734693886e-05, "loss": 2.2863, "step": 215000 }, { "FLOPS loss": 0.05855535715818405, "L0_d": 974.28, "MLM loss": 2.1315572261810303, "epoch": 2.39, "step": 215499 }, { "epoch": 2.39, "learning_rate": 5.807979591836735e-05, "loss": 2.2787, "step": 215500 }, { "FLOPS loss": 0.03791998326778412, "L0_d": 716.45, "MLM loss": 2.3424646854400635, "epoch": 2.4, "step": 215999 }, { "epoch": 2.4, "learning_rate": 5.797775510204082e-05, "loss": 2.2805, "step": 216000 }, { "FLOPS loss": 0.05710968002676964, "L0_d": 932.16, "MLM loss": 2.3624837398529053, "epoch": 2.4, "step": 216499 }, { "epoch": 2.4, "learning_rate": 5.7875918367346936e-05, "loss": 2.2808, "step": 216500 }, { "FLOPS loss": 0.05832459405064583, "L0_d": 923.48, "MLM loss": 2.119412422180176, "epoch": 2.41, "step": 216999 }, { "epoch": 2.41, "learning_rate": 5.777387755102041e-05, "loss": 2.2819, "step": 217000 }, { "FLOPS loss": 0.0697343721985817, "L0_d": 1126.11, "MLM loss": 2.2843456268310547, "epoch": 2.41, "step": 217499 }, { "epoch": 2.41, "learning_rate": 5.7671836734693885e-05, "loss": 2.2811, "step": 217500 }, { "FLOPS loss": 0.0475907027721405, "L0_d": 847.17, "MLM loss": 2.2957966327667236, "epoch": 2.42, "step": 217999 }, { "epoch": 2.42, "learning_rate": 5.756979591836735e-05, "loss": 2.2798, "step": 218000 }, { "FLOPS loss": 0.05427943542599678, "L0_d": 893.11, "MLM loss": 2.029310941696167, "epoch": 2.42, "step": 218499 }, { "epoch": 2.42, "learning_rate": 5.746795918367347e-05, "loss": 2.2821, "step": 218500 }, { "FLOPS loss": 0.04940053075551987, "L0_d": 910.53, "MLM loss": 2.2816390991210938, "epoch": 2.43, "step": 218999 }, { "epoch": 2.43, "learning_rate": 5.7365918367346936e-05, "loss": 2.2788, "step": 219000 }, { "FLOPS loss": 0.04652762785553932, "L0_d": 715.27, "MLM loss": 2.3862500190734863, "epoch": 2.44, "step": 219499 }, { "epoch": 2.44, "learning_rate": 5.726387755102041e-05, "loss": 2.2762, "step": 219500 }, { "FLOPS loss": 0.04910140112042427, "L0_d": 816.67, "MLM loss": 2.2733139991760254, "epoch": 2.44, "step": 219999 }, { "epoch": 2.44, "learning_rate": 5.7161836734693885e-05, "loss": 2.277, "step": 220000 }, { "FLOPS loss": 0.04857784882187843, "L0_d": 890.05, "MLM loss": 2.0154387950897217, "epoch": 2.45, "step": 220499 }, { "epoch": 2.45, "learning_rate": 5.705979591836736e-05, "loss": 2.2729, "step": 220500 }, { "FLOPS loss": 0.04636503383517265, "L0_d": 803.22, "MLM loss": 2.188833713531494, "epoch": 2.45, "step": 220999 }, { "epoch": 2.45, "learning_rate": 5.695795918367347e-05, "loss": 2.2798, "step": 221000 }, { "FLOPS loss": 0.04596555233001709, "L0_d": 948.42, "MLM loss": 2.2551398277282715, "epoch": 2.46, "step": 221499 }, { "epoch": 2.46, "learning_rate": 5.6855918367346935e-05, "loss": 2.2776, "step": 221500 }, { "FLOPS loss": 0.05169941112399101, "L0_d": 849.66, "MLM loss": 2.2161943912506104, "epoch": 2.46, "step": 221999 }, { "epoch": 2.46, "learning_rate": 5.675387755102041e-05, "loss": 2.2806, "step": 222000 }, { "FLOPS loss": 0.05788962170481682, "L0_d": 1237.5, "MLM loss": 2.203382968902588, "epoch": 2.47, "step": 222499 }, { "epoch": 2.47, "learning_rate": 5.665183673469388e-05, "loss": 2.2788, "step": 222500 }, { "FLOPS loss": 0.05764874815940857, "L0_d": 904.06, "MLM loss": 2.3287649154663086, "epoch": 2.47, "step": 222999 }, { "epoch": 2.47, "learning_rate": 5.6550000000000006e-05, "loss": 2.2748, "step": 223000 }, { "FLOPS loss": 0.06359417736530304, "L0_d": 998.28, "MLM loss": 2.344877243041992, "epoch": 2.48, "step": 223499 }, { "epoch": 2.48, "learning_rate": 5.644795918367347e-05, "loss": 2.2685, "step": 223500 }, { "FLOPS loss": 0.06933676451444626, "L0_d": 1077.55, "MLM loss": 2.0771408081054688, "epoch": 2.49, "step": 223999 }, { "epoch": 2.49, "learning_rate": 5.634591836734694e-05, "loss": 2.2707, "step": 224000 }, { "FLOPS loss": 0.06548981368541718, "L0_d": 1103.22, "MLM loss": 2.1917173862457275, "epoch": 2.49, "step": 224499 }, { "epoch": 2.49, "learning_rate": 5.624387755102041e-05, "loss": 2.2733, "step": 224500 }, { "FLOPS loss": 0.05155865475535393, "L0_d": 870.03, "MLM loss": 2.295875310897827, "epoch": 2.5, "step": 224999 }, { "epoch": 2.5, "learning_rate": 5.6142040816326534e-05, "loss": 2.2711, "step": 225000 }, { "FLOPS loss": 0.06172310560941696, "L0_d": 904.8, "MLM loss": 2.318241596221924, "epoch": 2.5, "step": 225499 }, { "epoch": 2.5, "learning_rate": 5.6040000000000006e-05, "loss": 2.273, "step": 225500 }, { "FLOPS loss": 0.050965480506420135, "L0_d": 933.56, "MLM loss": 2.2825512886047363, "epoch": 2.51, "step": 225999 }, { "epoch": 2.51, "learning_rate": 5.593795918367347e-05, "loss": 2.2731, "step": 226000 }, { "FLOPS loss": 0.06011576950550079, "L0_d": 944.72, "MLM loss": 2.1178033351898193, "epoch": 2.51, "step": 226499 }, { "epoch": 2.51, "learning_rate": 5.583591836734694e-05, "loss": 2.2703, "step": 226500 }, { "FLOPS loss": 0.05605456233024597, "L0_d": 966.25, "MLM loss": 2.1820104122161865, "epoch": 2.52, "step": 226999 }, { "epoch": 2.52, "learning_rate": 5.5733877551020406e-05, "loss": 2.2698, "step": 227000 }, { "FLOPS loss": 0.04151715710759163, "L0_d": 772.53, "MLM loss": 2.1365678310394287, "epoch": 2.52, "step": 227499 }, { "epoch": 2.52, "learning_rate": 5.5632040816326534e-05, "loss": 2.2709, "step": 227500 }, { "FLOPS loss": 0.05785823240876198, "L0_d": 853.11, "MLM loss": 2.0644872188568115, "epoch": 2.53, "step": 227999 }, { "epoch": 2.53, "learning_rate": 5.5530000000000005e-05, "loss": 2.2648, "step": 228000 }, { "FLOPS loss": 0.048110056668519974, "L0_d": 815.91, "MLM loss": 2.183323860168457, "epoch": 2.54, "step": 228499 }, { "epoch": 2.54, "learning_rate": 5.542795918367347e-05, "loss": 2.2733, "step": 228500 }, { "FLOPS loss": 0.053788695484399796, "L0_d": 770.12, "MLM loss": 2.216787338256836, "epoch": 2.54, "step": 228999 }, { "epoch": 2.54, "learning_rate": 5.532591836734694e-05, "loss": 2.267, "step": 229000 }, { "FLOPS loss": 0.050683408975601196, "L0_d": 883.25, "MLM loss": 2.205061674118042, "epoch": 2.55, "step": 229499 }, { "epoch": 2.55, "learning_rate": 5.522408163265307e-05, "loss": 2.2706, "step": 229500 }, { "FLOPS loss": 0.045749105513095856, "L0_d": 816.92, "MLM loss": 2.2446858882904053, "epoch": 2.55, "step": 229999 }, { "epoch": 2.55, "learning_rate": 5.5122040816326534e-05, "loss": 2.2698, "step": 230000 }, { "FLOPS loss": 0.05567130073904991, "L0_d": 1260.69, "MLM loss": 2.224703311920166, "epoch": 2.56, "step": 230499 }, { "epoch": 2.56, "learning_rate": 5.5020000000000005e-05, "loss": 2.2667, "step": 230500 }, { "FLOPS loss": 0.04637451469898224, "L0_d": 803.8, "MLM loss": 2.2396719455718994, "epoch": 2.56, "step": 230999 }, { "epoch": 2.56, "learning_rate": 5.491795918367347e-05, "loss": 2.2702, "step": 231000 }, { "FLOPS loss": 0.0658695176243782, "L0_d": 1073.3, "MLM loss": 2.18211030960083, "epoch": 2.57, "step": 231499 }, { "epoch": 2.57, "learning_rate": 5.481612244897959e-05, "loss": 2.2645, "step": 231500 }, { "FLOPS loss": 0.053279343992471695, "L0_d": 873.7, "MLM loss": 2.289722204208374, "epoch": 2.57, "step": 231999 }, { "epoch": 2.57, "learning_rate": 5.471408163265307e-05, "loss": 2.2686, "step": 232000 }, { "FLOPS loss": 0.036904674023389816, "L0_d": 687.97, "MLM loss": 2.1473941802978516, "epoch": 2.58, "step": 232499 }, { "epoch": 2.58, "learning_rate": 5.461204081632654e-05, "loss": 2.2658, "step": 232500 }, { "FLOPS loss": 0.07651033997535706, "L0_d": 1116.55, "MLM loss": 2.035149335861206, "epoch": 2.59, "step": 232999 }, { "epoch": 2.59, "learning_rate": 5.4510000000000005e-05, "loss": 2.2609, "step": 233000 }, { "FLOPS loss": 0.06056662276387215, "L0_d": 953.69, "MLM loss": 2.2792515754699707, "epoch": 2.59, "step": 233499 }, { "epoch": 2.59, "learning_rate": 5.4408163265306126e-05, "loss": 2.2645, "step": 233500 }, { "FLOPS loss": 0.06707636266946793, "L0_d": 1002.47, "MLM loss": 2.31508731842041, "epoch": 2.6, "step": 233999 }, { "epoch": 2.6, "learning_rate": 5.430612244897959e-05, "loss": 2.2614, "step": 234000 }, { "FLOPS loss": 0.050100281834602356, "L0_d": 825.02, "MLM loss": 2.273235321044922, "epoch": 2.6, "step": 234499 }, { "epoch": 2.6, "learning_rate": 5.420408163265306e-05, "loss": 2.2595, "step": 234500 }, { "FLOPS loss": 0.05122532695531845, "L0_d": 734.77, "MLM loss": 2.2624785900115967, "epoch": 2.61, "step": 234999 }, { "epoch": 2.61, "learning_rate": 5.410204081632654e-05, "loss": 2.2632, "step": 235000 }, { "FLOPS loss": 0.048747166991233826, "L0_d": 785.45, "MLM loss": 2.2363336086273193, "epoch": 2.61, "step": 235499 }, { "epoch": 2.61, "learning_rate": 5.4000204081632654e-05, "loss": 2.2631, "step": 235500 }, { "FLOPS loss": 0.06120562553405762, "L0_d": 921.34, "MLM loss": 2.2905240058898926, "epoch": 2.62, "step": 235999 }, { "epoch": 2.62, "learning_rate": 5.3898163265306125e-05, "loss": 2.2639, "step": 236000 }, { "FLOPS loss": 0.056542761623859406, "L0_d": 910.56, "MLM loss": 2.2068281173706055, "epoch": 2.62, "step": 236499 }, { "epoch": 2.62, "learning_rate": 5.379612244897959e-05, "loss": 2.259, "step": 236500 }, { "FLOPS loss": 0.035393137484788895, "L0_d": 635.03, "MLM loss": 2.2075304985046387, "epoch": 2.63, "step": 236999 }, { "epoch": 2.63, "learning_rate": 5.369408163265306e-05, "loss": 2.2612, "step": 237000 }, { "FLOPS loss": 0.05154719203710556, "L0_d": 743.83, "MLM loss": 2.337475538253784, "epoch": 2.64, "step": 237499 }, { "epoch": 2.64, "learning_rate": 5.359224489795919e-05, "loss": 2.2585, "step": 237500 }, { "FLOPS loss": 0.05083218961954117, "L0_d": 709.02, "MLM loss": 2.0901761054992676, "epoch": 2.64, "step": 237999 }, { "epoch": 2.64, "learning_rate": 5.3490204081632654e-05, "loss": 2.2542, "step": 238000 }, { "FLOPS loss": 0.04526914283633232, "L0_d": 806.97, "MLM loss": 2.1109421253204346, "epoch": 2.65, "step": 238499 }, { "epoch": 2.65, "learning_rate": 5.3388163265306125e-05, "loss": 2.2575, "step": 238500 }, { "FLOPS loss": 0.06267785280942917, "L0_d": 1086.52, "MLM loss": 2.280970811843872, "epoch": 2.65, "step": 238999 }, { "epoch": 2.65, "learning_rate": 5.328612244897959e-05, "loss": 2.2581, "step": 239000 }, { "FLOPS loss": 0.0569884330034256, "L0_d": 991.69, "MLM loss": 2.1320345401763916, "epoch": 2.66, "step": 239499 }, { "epoch": 2.66, "learning_rate": 5.318428571428572e-05, "loss": 2.2563, "step": 239500 }, { "FLOPS loss": 0.05082286149263382, "L0_d": 841.41, "MLM loss": 2.3559324741363525, "epoch": 2.66, "step": 239999 }, { "epoch": 2.66, "learning_rate": 5.308224489795919e-05, "loss": 2.2577, "step": 240000 }, { "FLOPS loss": 0.050658032298088074, "L0_d": 1080.58, "MLM loss": 2.1967451572418213, "epoch": 2.67, "step": 240499 }, { "epoch": 2.67, "learning_rate": 5.2980204081632654e-05, "loss": 2.2539, "step": 240500 }, { "FLOPS loss": 0.056913554668426514, "L0_d": 1105.84, "MLM loss": 2.2768471240997314, "epoch": 2.67, "step": 240999 }, { "epoch": 2.67, "learning_rate": 5.2878163265306125e-05, "loss": 2.2568, "step": 241000 }, { "FLOPS loss": 0.04149184376001358, "L0_d": 758.23, "MLM loss": 2.044034004211426, "epoch": 2.68, "step": 241499 }, { "epoch": 2.68, "learning_rate": 5.277612244897959e-05, "loss": 2.2557, "step": 241500 }, { "FLOPS loss": 0.04272041842341423, "L0_d": 947.69, "MLM loss": 2.170309066772461, "epoch": 2.69, "step": 241999 }, { "epoch": 2.69, "learning_rate": 5.267428571428572e-05, "loss": 2.2559, "step": 242000 }, { "FLOPS loss": 0.05496696010231972, "L0_d": 968.5, "MLM loss": 2.16430926322937, "epoch": 2.69, "step": 242499 }, { "epoch": 2.69, "learning_rate": 5.257224489795919e-05, "loss": 2.2558, "step": 242500 }, { "FLOPS loss": 0.06393209099769592, "L0_d": 1080.25, "MLM loss": 2.135136127471924, "epoch": 2.7, "step": 242999 }, { "epoch": 2.7, "learning_rate": 5.247020408163266e-05, "loss": 2.2588, "step": 243000 }, { "FLOPS loss": 0.05491110682487488, "L0_d": 915.09, "MLM loss": 2.300963878631592, "epoch": 2.7, "step": 243499 }, { "epoch": 2.7, "learning_rate": 5.2368163265306125e-05, "loss": 2.2518, "step": 243500 }, { "FLOPS loss": 0.038062237203121185, "L0_d": 709.52, "MLM loss": 2.2721335887908936, "epoch": 2.71, "step": 243999 }, { "epoch": 2.71, "learning_rate": 5.226632653061225e-05, "loss": 2.2572, "step": 244000 }, { "FLOPS loss": 0.05400152504444122, "L0_d": 825.84, "MLM loss": 2.115170955657959, "epoch": 2.71, "step": 244499 }, { "epoch": 2.71, "learning_rate": 5.216448979591837e-05, "loss": 2.2543, "step": 244500 }, { "FLOPS loss": 0.05306870490312576, "L0_d": 790.2, "MLM loss": 2.1474010944366455, "epoch": 2.72, "step": 244999 }, { "epoch": 2.72, "learning_rate": 5.206244897959184e-05, "loss": 2.2564, "step": 245000 }, { "FLOPS loss": 0.04991094395518303, "L0_d": 916.58, "MLM loss": 2.2158572673797607, "epoch": 2.72, "step": 245499 }, { "epoch": 2.72, "learning_rate": 5.196040816326531e-05, "loss": 2.2554, "step": 245500 }, { "FLOPS loss": 0.06286051124334335, "L0_d": 873.25, "MLM loss": 2.224592447280884, "epoch": 2.73, "step": 245999 }, { "epoch": 2.73, "learning_rate": 5.1858367346938774e-05, "loss": 2.255, "step": 246000 }, { "FLOPS loss": 0.05380546674132347, "L0_d": 765.97, "MLM loss": 2.056410551071167, "epoch": 2.74, "step": 246499 }, { "epoch": 2.74, "learning_rate": 5.1756326530612245e-05, "loss": 2.2561, "step": 246500 }, { "FLOPS loss": 0.06842859834432602, "L0_d": 1109.06, "MLM loss": 2.1177592277526855, "epoch": 2.74, "step": 246999 }, { "epoch": 2.74, "learning_rate": 5.1654285714285724e-05, "loss": 2.2504, "step": 247000 }, { "FLOPS loss": 0.05946075916290283, "L0_d": 987.23, "MLM loss": 2.3177003860473633, "epoch": 2.75, "step": 247499 }, { "epoch": 2.75, "learning_rate": 5.155224489795919e-05, "loss": 2.2494, "step": 247500 }, { "FLOPS loss": 0.044885631650686264, "L0_d": 810.19, "MLM loss": 2.3376481533050537, "epoch": 2.75, "step": 247999 }, { "epoch": 2.75, "learning_rate": 5.145020408163266e-05, "loss": 2.2487, "step": 248000 }, { "FLOPS loss": 0.06399539858102798, "L0_d": 1071.72, "MLM loss": 2.2000818252563477, "epoch": 2.76, "step": 248499 }, { "epoch": 2.76, "learning_rate": 5.1348367346938774e-05, "loss": 2.2487, "step": 248500 }, { "FLOPS loss": 0.06629626452922821, "L0_d": 1149.53, "MLM loss": 2.106959581375122, "epoch": 2.76, "step": 248999 }, { "epoch": 2.76, "learning_rate": 5.1246326530612245e-05, "loss": 2.2484, "step": 249000 }, { "FLOPS loss": 0.04863587021827698, "L0_d": 842.42, "MLM loss": 2.114797353744507, "epoch": 2.77, "step": 249499 }, { "epoch": 2.77, "learning_rate": 5.114428571428571e-05, "loss": 2.2483, "step": 249500 }, { "FLOPS loss": 0.06066122278571129, "L0_d": 1027.81, "MLM loss": 2.0860722064971924, "epoch": 2.77, "step": 249999 }, { "epoch": 2.77, "learning_rate": 5.104224489795919e-05, "loss": 2.245, "step": 250000 }, { "FLOPS loss": 0.04778517782688141, "L0_d": 757.7, "MLM loss": 2.323871612548828, "epoch": 2.78, "step": 250499 }, { "epoch": 2.78, "learning_rate": 5.094040816326531e-05, "loss": 2.2475, "step": 250500 }, { "FLOPS loss": 0.05343439057469368, "L0_d": 827.64, "MLM loss": 2.159205436706543, "epoch": 2.79, "step": 250999 }, { "epoch": 2.79, "learning_rate": 5.0838367346938773e-05, "loss": 2.2527, "step": 251000 }, { "FLOPS loss": 0.05928456783294678, "L0_d": 1000.28, "MLM loss": 2.162782907485962, "epoch": 2.79, "step": 251499 }, { "epoch": 2.79, "learning_rate": 5.0736326530612245e-05, "loss": 2.2528, "step": 251500 }, { "FLOPS loss": 0.06347516924142838, "L0_d": 1157.11, "MLM loss": 2.182072877883911, "epoch": 2.8, "step": 251999 }, { "epoch": 2.8, "learning_rate": 5.063428571428571e-05, "loss": 2.2464, "step": 252000 }, { "FLOPS loss": 0.07632464170455933, "L0_d": 1599.62, "MLM loss": 2.2566537857055664, "epoch": 2.8, "step": 252499 }, { "epoch": 2.8, "learning_rate": 5.053224489795919e-05, "loss": 2.2467, "step": 252500 }, { "FLOPS loss": 0.045484162867069244, "L0_d": 830.86, "MLM loss": 1.989594578742981, "epoch": 2.81, "step": 252999 }, { "epoch": 2.81, "learning_rate": 5.043040816326531e-05, "loss": 2.2465, "step": 253000 }, { "FLOPS loss": 0.04404137283563614, "L0_d": 776.55, "MLM loss": 2.1720945835113525, "epoch": 2.81, "step": 253499 }, { "epoch": 2.81, "learning_rate": 5.032836734693877e-05, "loss": 2.2456, "step": 253500 }, { "FLOPS loss": 0.05956972390413284, "L0_d": 944.78, "MLM loss": 2.158615827560425, "epoch": 2.82, "step": 253999 }, { "epoch": 2.82, "learning_rate": 5.0226326530612245e-05, "loss": 2.2457, "step": 254000 }, { "FLOPS loss": 0.0445852093398571, "L0_d": 724.98, "MLM loss": 2.082374334335327, "epoch": 2.82, "step": 254499 }, { "epoch": 2.82, "learning_rate": 5.012448979591837e-05, "loss": 2.2453, "step": 254500 }, { "FLOPS loss": 0.06759438663721085, "L0_d": 1082.47, "MLM loss": 2.101926803588867, "epoch": 2.83, "step": 254999 }, { "epoch": 2.83, "learning_rate": 5.0022448979591844e-05, "loss": 2.2458, "step": 255000 }, { "FLOPS loss": 0.05499878525733948, "L0_d": 883.91, "MLM loss": 2.187554121017456, "epoch": 2.84, "step": 255499 }, { "epoch": 2.84, "learning_rate": 4.992040816326531e-05, "loss": 2.2416, "step": 255500 }, { "FLOPS loss": 0.039610929787158966, "L0_d": 713.52, "MLM loss": 2.199946403503418, "epoch": 2.84, "step": 255999 }, { "epoch": 2.84, "learning_rate": 4.981836734693878e-05, "loss": 2.2427, "step": 256000 }, { "FLOPS loss": 0.04497678577899933, "L0_d": 804.88, "MLM loss": 2.088580846786499, "epoch": 2.85, "step": 256499 }, { "epoch": 2.85, "learning_rate": 4.9716326530612245e-05, "loss": 2.2456, "step": 256500 }, { "FLOPS loss": 0.05698969587683678, "L0_d": 844.95, "MLM loss": 2.185488700866699, "epoch": 2.85, "step": 256999 }, { "epoch": 2.85, "learning_rate": 4.9614285714285716e-05, "loss": 2.2443, "step": 257000 }, { "FLOPS loss": 0.07459152489900589, "L0_d": 988.91, "MLM loss": 2.0591201782226562, "epoch": 2.86, "step": 257499 }, { "epoch": 2.86, "learning_rate": 4.951224489795919e-05, "loss": 2.2418, "step": 257500 }, { "FLOPS loss": 0.04065524786710739, "L0_d": 752.0, "MLM loss": 2.1920008659362793, "epoch": 2.86, "step": 257999 }, { "epoch": 2.86, "learning_rate": 4.941020408163265e-05, "loss": 2.2432, "step": 258000 }, { "FLOPS loss": 0.05703974515199661, "L0_d": 858.95, "MLM loss": 1.997971773147583, "epoch": 2.87, "step": 258499 }, { "epoch": 2.87, "learning_rate": 4.930836734693878e-05, "loss": 2.2391, "step": 258500 }, { "FLOPS loss": 0.06548793613910675, "L0_d": 1015.34, "MLM loss": 2.318230152130127, "epoch": 2.87, "step": 258999 }, { "epoch": 2.87, "learning_rate": 4.9206326530612244e-05, "loss": 2.2437, "step": 259000 }, { "FLOPS loss": 0.06613794714212418, "L0_d": 973.38, "MLM loss": 2.329318046569824, "epoch": 2.88, "step": 259499 }, { "epoch": 2.88, "learning_rate": 4.910428571428572e-05, "loss": 2.2403, "step": 259500 }, { "FLOPS loss": 0.056171733886003494, "L0_d": 936.62, "MLM loss": 2.1623589992523193, "epoch": 2.89, "step": 259999 }, { "epoch": 2.89, "learning_rate": 4.900224489795919e-05, "loss": 2.2408, "step": 260000 }, { "FLOPS loss": 0.0532575361430645, "L0_d": 818.92, "MLM loss": 2.2925689220428467, "epoch": 2.89, "step": 260499 }, { "epoch": 2.89, "learning_rate": 4.890040816326531e-05, "loss": 2.2377, "step": 260500 }, { "FLOPS loss": 0.05445598438382149, "L0_d": 792.17, "MLM loss": 1.991426706314087, "epoch": 2.9, "step": 260999 }, { "epoch": 2.9, "learning_rate": 4.879836734693878e-05, "loss": 2.243, "step": 261000 }, { "FLOPS loss": 0.04531127214431763, "L0_d": 778.75, "MLM loss": 2.2480430603027344, "epoch": 2.9, "step": 261499 }, { "epoch": 2.9, "learning_rate": 4.86965306122449e-05, "loss": 2.2386, "step": 261500 }, { "FLOPS loss": 0.04863082617521286, "L0_d": 886.45, "MLM loss": 2.147947072982788, "epoch": 2.91, "step": 261999 }, { "epoch": 2.91, "learning_rate": 4.859448979591837e-05, "loss": 2.2383, "step": 262000 }, { "FLOPS loss": 0.060139250010252, "L0_d": 1008.83, "MLM loss": 2.190028667449951, "epoch": 2.91, "step": 262499 }, { "epoch": 2.91, "learning_rate": 4.8492448979591836e-05, "loss": 2.2429, "step": 262500 }, { "FLOPS loss": 0.062470175325870514, "L0_d": 1249.94, "MLM loss": 2.234449625015259, "epoch": 2.92, "step": 262999 }, { "epoch": 2.92, "learning_rate": 4.839040816326531e-05, "loss": 2.24, "step": 263000 }, { "FLOPS loss": 0.05068064481019974, "L0_d": 1002.88, "MLM loss": 2.2005953788757324, "epoch": 2.92, "step": 263499 }, { "epoch": 2.92, "learning_rate": 4.828836734693878e-05, "loss": 2.2402, "step": 263500 }, { "FLOPS loss": 0.05311071500182152, "L0_d": 1151.12, "MLM loss": 2.1814534664154053, "epoch": 2.93, "step": 263999 }, { "epoch": 2.93, "learning_rate": 4.8186326530612244e-05, "loss": 2.242, "step": 264000 }, { "FLOPS loss": 0.05881292745471001, "L0_d": 1264.55, "MLM loss": 2.2747695446014404, "epoch": 2.94, "step": 264499 }, { "epoch": 2.94, "learning_rate": 4.8084285714285715e-05, "loss": 2.2415, "step": 264500 }, { "FLOPS loss": 0.05623582750558853, "L0_d": 836.94, "MLM loss": 2.186206579208374, "epoch": 2.94, "step": 264999 }, { "epoch": 2.94, "learning_rate": 4.798224489795918e-05, "loss": 2.2393, "step": 265000 }, { "FLOPS loss": 0.044544193893671036, "L0_d": 652.16, "MLM loss": 2.3120899200439453, "epoch": 2.95, "step": 265499 }, { "epoch": 2.95, "learning_rate": 4.788040816326531e-05, "loss": 2.2359, "step": 265500 }, { "FLOPS loss": 0.049939434975385666, "L0_d": 926.88, "MLM loss": 2.229126214981079, "epoch": 2.95, "step": 265999 }, { "epoch": 2.95, "learning_rate": 4.777836734693878e-05, "loss": 2.2411, "step": 266000 }, { "FLOPS loss": 0.04950281232595444, "L0_d": 928.11, "MLM loss": 2.208569049835205, "epoch": 2.96, "step": 266499 }, { "epoch": 2.96, "learning_rate": 4.76765306122449e-05, "loss": 2.2339, "step": 266500 }, { "FLOPS loss": 0.04743512347340584, "L0_d": 791.44, "MLM loss": 2.420844316482544, "epoch": 2.96, "step": 266999 }, { "epoch": 2.96, "learning_rate": 4.757448979591837e-05, "loss": 2.2409, "step": 267000 }, { "FLOPS loss": 0.04562266916036606, "L0_d": 617.0, "MLM loss": 2.180669069290161, "epoch": 2.97, "step": 267499 }, { "epoch": 2.97, "learning_rate": 4.7472448979591836e-05, "loss": 2.2358, "step": 267500 }, { "FLOPS loss": 0.056366074830293655, "L0_d": 1087.72, "MLM loss": 2.123141288757324, "epoch": 2.97, "step": 267999 }, { "epoch": 2.97, "learning_rate": 4.737040816326531e-05, "loss": 2.2351, "step": 268000 }, { "FLOPS loss": 0.057932086288928986, "L0_d": 910.19, "MLM loss": 2.184842824935913, "epoch": 2.98, "step": 268499 }, { "epoch": 2.98, "learning_rate": 4.726836734693878e-05, "loss": 2.2355, "step": 268500 }, { "FLOPS loss": 0.041290223598480225, "L0_d": 695.25, "MLM loss": 2.289884328842163, "epoch": 2.99, "step": 268999 }, { "epoch": 2.99, "learning_rate": 4.716632653061225e-05, "loss": 2.2326, "step": 269000 }, { "FLOPS loss": 0.05147482827305794, "L0_d": 926.08, "MLM loss": 2.114023447036743, "epoch": 2.99, "step": 269499 }, { "epoch": 2.99, "learning_rate": 4.706448979591837e-05, "loss": 2.2358, "step": 269500 }, { "FLOPS loss": 0.06018621101975441, "L0_d": 947.72, "MLM loss": 2.1869916915893555, "epoch": 3.0, "step": 269999 }, { "epoch": 3.0, "learning_rate": 4.6962448979591836e-05, "loss": 2.2327, "step": 270000 }, { "FLOPS loss": 0.046223901212215424, "L0_d": 983.28, "MLM loss": 2.0809412002563477, "epoch": 3.0, "step": 270499 }, { "epoch": 3.0, "learning_rate": 4.686040816326531e-05, "loss": 2.232, "step": 270500 }, { "FLOPS loss": 0.052107073366642, "L0_d": 725.77, "MLM loss": 2.2141494750976562, "epoch": 3.01, "step": 270999 }, { "epoch": 3.01, "learning_rate": 4.675836734693877e-05, "loss": 2.2329, "step": 271000 }, { "FLOPS loss": 0.06347242742776871, "L0_d": 1224.59, "MLM loss": 2.2222373485565186, "epoch": 3.01, "step": 271499 }, { "epoch": 3.01, "learning_rate": 4.665632653061225e-05, "loss": 2.2292, "step": 271500 }, { "FLOPS loss": 0.06285754591226578, "L0_d": 954.28, "MLM loss": 2.084881544113159, "epoch": 3.02, "step": 271999 }, { "epoch": 3.02, "learning_rate": 4.6554285714285715e-05, "loss": 2.2329, "step": 272000 }, { "FLOPS loss": 0.05175292119383812, "L0_d": 900.11, "MLM loss": 2.1916990280151367, "epoch": 3.02, "step": 272499 }, { "epoch": 3.02, "learning_rate": 4.6452244897959186e-05, "loss": 2.2264, "step": 272500 }, { "FLOPS loss": 0.05284376069903374, "L0_d": 1039.89, "MLM loss": 2.008911371231079, "epoch": 3.03, "step": 272999 }, { "epoch": 3.03, "learning_rate": 4.635020408163265e-05, "loss": 2.2299, "step": 273000 }, { "FLOPS loss": 0.05500178039073944, "L0_d": 758.98, "MLM loss": 2.292144775390625, "epoch": 3.04, "step": 273499 }, { "epoch": 3.04, "learning_rate": 4.624836734693878e-05, "loss": 2.2293, "step": 273500 }, { "FLOPS loss": 0.053113482892513275, "L0_d": 792.38, "MLM loss": 2.0832297801971436, "epoch": 3.04, "step": 273999 }, { "epoch": 3.04, "learning_rate": 4.614632653061225e-05, "loss": 2.2291, "step": 274000 }, { "FLOPS loss": 0.06035423278808594, "L0_d": 830.22, "MLM loss": 2.1706573963165283, "epoch": 3.05, "step": 274499 }, { "epoch": 3.05, "learning_rate": 4.6044285714285715e-05, "loss": 2.2263, "step": 274500 }, { "FLOPS loss": 0.05119289457798004, "L0_d": 1076.17, "MLM loss": 2.233741283416748, "epoch": 3.05, "step": 274999 }, { "epoch": 3.05, "learning_rate": 4.5942244897959186e-05, "loss": 2.2324, "step": 275000 }, { "FLOPS loss": 0.059192344546318054, "L0_d": 896.91, "MLM loss": 2.2112321853637695, "epoch": 3.06, "step": 275499 }, { "epoch": 3.06, "learning_rate": 4.584040816326531e-05, "loss": 2.2328, "step": 275500 }, { "FLOPS loss": 0.060568492859601974, "L0_d": 1074.77, "MLM loss": 2.052946090698242, "epoch": 3.06, "step": 275999 }, { "epoch": 3.06, "learning_rate": 4.573836734693878e-05, "loss": 2.2292, "step": 276000 }, { "FLOPS loss": 0.05105860158801079, "L0_d": 802.83, "MLM loss": 2.2252485752105713, "epoch": 3.07, "step": 276499 }, { "epoch": 3.07, "learning_rate": 4.563632653061224e-05, "loss": 2.2271, "step": 276500 }, { "FLOPS loss": 0.06822168081998825, "L0_d": 1082.59, "MLM loss": 2.109766721725464, "epoch": 3.07, "step": 276999 }, { "epoch": 3.07, "learning_rate": 4.553428571428572e-05, "loss": 2.2271, "step": 277000 }, { "FLOPS loss": 0.052984897047281265, "L0_d": 827.16, "MLM loss": 2.2336745262145996, "epoch": 3.08, "step": 277499 }, { "epoch": 3.08, "learning_rate": 4.543244897959184e-05, "loss": 2.2244, "step": 277500 }, { "FLOPS loss": 0.048153866082429886, "L0_d": 860.14, "MLM loss": 2.052936553955078, "epoch": 3.09, "step": 277999 }, { "epoch": 3.09, "learning_rate": 4.5330408163265307e-05, "loss": 2.2206, "step": 278000 }, { "FLOPS loss": 0.043998897075653076, "L0_d": 816.27, "MLM loss": 2.125237464904785, "epoch": 3.09, "step": 278499 }, { "epoch": 3.09, "learning_rate": 4.522836734693878e-05, "loss": 2.2251, "step": 278500 }, { "FLOPS loss": 0.042622800916433334, "L0_d": 789.81, "MLM loss": 2.1155881881713867, "epoch": 3.1, "step": 278999 }, { "epoch": 3.1, "learning_rate": 4.512632653061224e-05, "loss": 2.2257, "step": 279000 }, { "FLOPS loss": 0.06448719650506973, "L0_d": 1148.72, "MLM loss": 2.3489694595336914, "epoch": 3.1, "step": 279499 }, { "epoch": 3.1, "learning_rate": 4.502428571428572e-05, "loss": 2.226, "step": 279500 }, { "FLOPS loss": 0.04587463289499283, "L0_d": 864.86, "MLM loss": 2.1391663551330566, "epoch": 3.11, "step": 279999 }, { "epoch": 3.11, "learning_rate": 4.492244897959184e-05, "loss": 2.2278, "step": 280000 }, { "FLOPS loss": 0.06704455614089966, "L0_d": 1058.12, "MLM loss": 2.1483206748962402, "epoch": 3.11, "step": 280499 }, { "epoch": 3.11, "learning_rate": 4.4820408163265306e-05, "loss": 2.2223, "step": 280500 }, { "FLOPS loss": 0.058364126831293106, "L0_d": 876.44, "MLM loss": 2.360599994659424, "epoch": 3.12, "step": 280999 }, { "epoch": 3.12, "learning_rate": 4.471836734693878e-05, "loss": 2.2263, "step": 281000 }, { "FLOPS loss": 0.056107163429260254, "L0_d": 1001.11, "MLM loss": 2.1112656593322754, "epoch": 3.12, "step": 281499 }, { "epoch": 3.12, "learning_rate": 4.46165306122449e-05, "loss": 2.2223, "step": 281500 }, { "FLOPS loss": 0.042310819029808044, "L0_d": 734.28, "MLM loss": 2.1643552780151367, "epoch": 3.13, "step": 281999 }, { "epoch": 3.13, "learning_rate": 4.451448979591837e-05, "loss": 2.2219, "step": 282000 }, { "FLOPS loss": 0.057060495018959045, "L0_d": 986.69, "MLM loss": 2.1081511974334717, "epoch": 3.14, "step": 282499 }, { "epoch": 3.14, "learning_rate": 4.4412448979591835e-05, "loss": 2.2158, "step": 282500 }, { "FLOPS loss": 0.0571078397333622, "L0_d": 803.59, "MLM loss": 2.0553886890411377, "epoch": 3.14, "step": 282999 }, { "epoch": 3.14, "learning_rate": 4.431040816326531e-05, "loss": 2.2207, "step": 283000 }, { "FLOPS loss": 0.07472234219312668, "L0_d": 1185.14, "MLM loss": 2.0385375022888184, "epoch": 3.15, "step": 283499 }, { "epoch": 3.15, "learning_rate": 4.420836734693878e-05, "loss": 2.2243, "step": 283500 }, { "FLOPS loss": 0.055899813771247864, "L0_d": 788.62, "MLM loss": 2.1567277908325195, "epoch": 3.15, "step": 283999 }, { "epoch": 3.15, "learning_rate": 4.410632653061225e-05, "loss": 2.2196, "step": 284000 }, { "FLOPS loss": 0.05922339856624603, "L0_d": 879.62, "MLM loss": 2.0481672286987305, "epoch": 3.16, "step": 284499 }, { "epoch": 3.16, "learning_rate": 4.4004285714285714e-05, "loss": 2.2253, "step": 284500 }, { "FLOPS loss": 0.05354371666908264, "L0_d": 918.16, "MLM loss": 2.2259509563446045, "epoch": 3.16, "step": 284999 }, { "epoch": 3.16, "learning_rate": 4.3902244897959185e-05, "loss": 2.2195, "step": 285000 }, { "FLOPS loss": 0.04211512207984924, "L0_d": 651.16, "MLM loss": 2.279510974884033, "epoch": 3.17, "step": 285499 }, { "epoch": 3.17, "learning_rate": 4.380040816326531e-05, "loss": 2.2193, "step": 285500 }, { "FLOPS loss": 0.048691991716623306, "L0_d": 847.05, "MLM loss": 2.1121108531951904, "epoch": 3.17, "step": 285999 }, { "epoch": 3.17, "learning_rate": 4.369836734693878e-05, "loss": 2.2163, "step": 286000 }, { "FLOPS loss": 0.0590415894985199, "L0_d": 1019.86, "MLM loss": 2.060150146484375, "epoch": 3.18, "step": 286499 }, { "epoch": 3.18, "learning_rate": 4.359632653061225e-05, "loss": 2.2174, "step": 286500 }, { "FLOPS loss": 0.053612228482961655, "L0_d": 796.62, "MLM loss": 2.1316020488739014, "epoch": 3.19, "step": 286999 }, { "epoch": 3.19, "learning_rate": 4.3494285714285714e-05, "loss": 2.2194, "step": 287000 }, { "FLOPS loss": 0.05339399352669716, "L0_d": 923.47, "MLM loss": 2.359421491622925, "epoch": 3.19, "step": 287499 }, { "epoch": 3.19, "learning_rate": 4.3392448979591834e-05, "loss": 2.2181, "step": 287500 }, { "FLOPS loss": 0.04001978412270546, "L0_d": 713.88, "MLM loss": 2.0679988861083984, "epoch": 3.2, "step": 287999 }, { "epoch": 3.2, "learning_rate": 4.3290408163265306e-05, "loss": 2.2199, "step": 288000 }, { "FLOPS loss": 0.06227588281035423, "L0_d": 822.53, "MLM loss": 2.1163511276245117, "epoch": 3.2, "step": 288499 }, { "epoch": 3.2, "learning_rate": 4.318836734693878e-05, "loss": 2.2199, "step": 288500 }, { "FLOPS loss": 0.043540503829717636, "L0_d": 799.95, "MLM loss": 2.185105800628662, "epoch": 3.21, "step": 288999 }, { "epoch": 3.21, "learning_rate": 4.308632653061225e-05, "loss": 2.2171, "step": 289000 }, { "FLOPS loss": 0.062005698680877686, "L0_d": 1007.66, "MLM loss": 2.219142198562622, "epoch": 3.21, "step": 289499 }, { "epoch": 3.21, "learning_rate": 4.2984285714285713e-05, "loss": 2.2161, "step": 289500 }, { "FLOPS loss": 0.04948747903108597, "L0_d": 833.72, "MLM loss": 2.0815012454986572, "epoch": 3.22, "step": 289999 }, { "epoch": 3.22, "learning_rate": 4.288244897959184e-05, "loss": 2.2181, "step": 290000 }, { "FLOPS loss": 0.05977959930896759, "L0_d": 909.45, "MLM loss": 2.309501886367798, "epoch": 3.22, "step": 290499 }, { "epoch": 3.22, "learning_rate": 4.2780408163265306e-05, "loss": 2.2152, "step": 290500 }, { "FLOPS loss": 0.05414106324315071, "L0_d": 981.75, "MLM loss": 2.0887105464935303, "epoch": 3.23, "step": 290999 }, { "epoch": 3.23, "learning_rate": 4.267836734693878e-05, "loss": 2.2216, "step": 291000 }, { "FLOPS loss": 0.03959670290350914, "L0_d": 670.84, "MLM loss": 2.2173049449920654, "epoch": 3.23, "step": 291499 }, { "epoch": 3.24, "learning_rate": 4.257632653061225e-05, "loss": 2.215, "step": 291500 }, { "FLOPS loss": 0.04575555771589279, "L0_d": 817.42, "MLM loss": 2.1522772312164307, "epoch": 3.24, "step": 291999 }, { "epoch": 3.24, "learning_rate": 4.247448979591837e-05, "loss": 2.2134, "step": 292000 }, { "FLOPS loss": 0.042405616492033005, "L0_d": 780.3, "MLM loss": 2.1792874336242676, "epoch": 3.25, "step": 292499 }, { "epoch": 3.25, "learning_rate": 4.237244897959184e-05, "loss": 2.2135, "step": 292500 }, { "FLOPS loss": 0.04885968193411827, "L0_d": 948.97, "MLM loss": 2.3135223388671875, "epoch": 3.25, "step": 292999 }, { "epoch": 3.25, "learning_rate": 4.2270408163265305e-05, "loss": 2.2152, "step": 293000 }, { "FLOPS loss": 0.055942602455616, "L0_d": 912.14, "MLM loss": 2.2848806381225586, "epoch": 3.26, "step": 293499 }, { "epoch": 3.26, "learning_rate": 4.216857142857143e-05, "loss": 2.218, "step": 293500 }, { "FLOPS loss": 0.04596254602074623, "L0_d": 781.91, "MLM loss": 2.004021406173706, "epoch": 3.26, "step": 293999 }, { "epoch": 3.26, "learning_rate": 4.20665306122449e-05, "loss": 2.2123, "step": 294000 }, { "FLOPS loss": 0.06211161985993385, "L0_d": 1022.8, "MLM loss": 1.992515206336975, "epoch": 3.27, "step": 294499 }, { "epoch": 3.27, "learning_rate": 4.196448979591837e-05, "loss": 2.2159, "step": 294500 }, { "FLOPS loss": 0.05643187090754509, "L0_d": 855.0, "MLM loss": 2.1165661811828613, "epoch": 3.27, "step": 294999 }, { "epoch": 3.27, "learning_rate": 4.186244897959184e-05, "loss": 2.2172, "step": 295000 }, { "FLOPS loss": 0.053313981741666794, "L0_d": 840.7, "MLM loss": 2.2312092781066895, "epoch": 3.28, "step": 295499 }, { "epoch": 3.28, "learning_rate": 4.1760408163265305e-05, "loss": 2.2157, "step": 295500 }, { "FLOPS loss": 0.05642090365290642, "L0_d": 926.34, "MLM loss": 2.2046632766723633, "epoch": 3.28, "step": 295999 }, { "epoch": 3.28, "learning_rate": 4.165836734693878e-05, "loss": 2.2109, "step": 296000 }, { "FLOPS loss": 0.059490542858839035, "L0_d": 995.2, "MLM loss": 2.146906852722168, "epoch": 3.29, "step": 296499 }, { "epoch": 3.29, "learning_rate": 4.155632653061225e-05, "loss": 2.2097, "step": 296500 }, { "FLOPS loss": 0.051311276853084564, "L0_d": 885.23, "MLM loss": 2.1590046882629395, "epoch": 3.3, "step": 296999 }, { "epoch": 3.3, "learning_rate": 4.145428571428572e-05, "loss": 2.2148, "step": 297000 }, { "FLOPS loss": 0.062071483582258224, "L0_d": 1094.39, "MLM loss": 2.164262294769287, "epoch": 3.3, "step": 297499 }, { "epoch": 3.3, "learning_rate": 4.135244897959184e-05, "loss": 2.2138, "step": 297500 }, { "FLOPS loss": 0.05503448471426964, "L0_d": 836.16, "MLM loss": 2.300021171569824, "epoch": 3.31, "step": 297999 }, { "epoch": 3.31, "learning_rate": 4.1250408163265305e-05, "loss": 2.2131, "step": 298000 }, { "FLOPS loss": 0.05203080177307129, "L0_d": 896.89, "MLM loss": 2.1577372550964355, "epoch": 3.31, "step": 298499 }, { "epoch": 3.31, "learning_rate": 4.1148367346938776e-05, "loss": 2.2071, "step": 298500 }, { "FLOPS loss": 0.04980502650141716, "L0_d": 897.47, "MLM loss": 2.222208023071289, "epoch": 3.32, "step": 298999 }, { "epoch": 3.32, "learning_rate": 4.104632653061225e-05, "loss": 2.209, "step": 299000 }, { "FLOPS loss": 0.054337188601493835, "L0_d": 899.19, "MLM loss": 2.0981152057647705, "epoch": 3.32, "step": 299499 }, { "epoch": 3.32, "learning_rate": 4.094448979591837e-05, "loss": 2.2099, "step": 299500 }, { "FLOPS loss": 0.05866047367453575, "L0_d": 968.66, "MLM loss": 2.436058521270752, "epoch": 3.33, "step": 299999 }, { "epoch": 3.33, "learning_rate": 4.084244897959184e-05, "loss": 2.2116, "step": 300000 }, { "FLOPS loss": 0.04113021865487099, "L0_d": 1044.72, "MLM loss": 2.2099387645721436, "epoch": 3.33, "step": 300499 }, { "epoch": 3.33, "learning_rate": 4.074040816326531e-05, "loss": 2.2137, "step": 300500 }, { "FLOPS loss": 0.042872294783592224, "L0_d": 815.72, "MLM loss": 2.2005367279052734, "epoch": 3.34, "step": 300999 }, { "epoch": 3.34, "learning_rate": 4.0638367346938776e-05, "loss": 2.2087, "step": 301000 }, { "FLOPS loss": 0.0595853328704834, "L0_d": 923.06, "MLM loss": 2.019927978515625, "epoch": 3.35, "step": 301499 }, { "epoch": 3.35, "learning_rate": 4.05365306122449e-05, "loss": 2.2088, "step": 301500 }, { "FLOPS loss": 0.04233074560761452, "L0_d": 751.55, "MLM loss": 2.09970760345459, "epoch": 3.35, "step": 301999 }, { "epoch": 3.35, "learning_rate": 4.043448979591837e-05, "loss": 2.2094, "step": 302000 }, { "FLOPS loss": 0.05173299461603165, "L0_d": 1075.12, "MLM loss": 2.0777177810668945, "epoch": 3.36, "step": 302499 }, { "epoch": 3.36, "learning_rate": 4.033244897959184e-05, "loss": 2.2093, "step": 302500 }, { "FLOPS loss": 0.05053543299436569, "L0_d": 806.16, "MLM loss": 2.035891532897949, "epoch": 3.36, "step": 302999 }, { "epoch": 3.36, "learning_rate": 4.023040816326531e-05, "loss": 2.2149, "step": 303000 }, { "FLOPS loss": 0.05274407938122749, "L0_d": 943.48, "MLM loss": 2.0349557399749756, "epoch": 3.37, "step": 303499 }, { "epoch": 3.37, "learning_rate": 4.012857142857143e-05, "loss": 2.2078, "step": 303500 }, { "FLOPS loss": 0.05681909620761871, "L0_d": 915.23, "MLM loss": 2.1544435024261475, "epoch": 3.37, "step": 303999 }, { "epoch": 3.37, "learning_rate": 4.0026530612244904e-05, "loss": 2.2063, "step": 304000 }, { "FLOPS loss": 0.06260503828525543, "L0_d": 1155.8, "MLM loss": 2.047473192214966, "epoch": 3.38, "step": 304499 }, { "epoch": 3.38, "learning_rate": 3.992448979591837e-05, "loss": 2.2074, "step": 304500 }, { "FLOPS loss": 0.047228068113327026, "L0_d": 728.88, "MLM loss": 2.172936201095581, "epoch": 3.38, "step": 304999 }, { "epoch": 3.38, "learning_rate": 3.982244897959184e-05, "loss": 2.2038, "step": 305000 }, { "FLOPS loss": 0.05499499291181564, "L0_d": 907.2, "MLM loss": 2.1383681297302246, "epoch": 3.39, "step": 305499 }, { "epoch": 3.39, "learning_rate": 3.972040816326531e-05, "loss": 2.2029, "step": 305500 }, { "FLOPS loss": 0.05297322943806648, "L0_d": 962.38, "MLM loss": 2.282512664794922, "epoch": 3.4, "step": 305999 }, { "epoch": 3.4, "learning_rate": 3.961857142857143e-05, "loss": 2.2089, "step": 306000 }, { "FLOPS loss": 0.04335491359233856, "L0_d": 659.44, "MLM loss": 2.2140116691589355, "epoch": 3.4, "step": 306499 }, { "epoch": 3.4, "learning_rate": 3.95165306122449e-05, "loss": 2.2051, "step": 306500 }, { "FLOPS loss": 0.05476229265332222, "L0_d": 791.34, "MLM loss": 2.1232738494873047, "epoch": 3.41, "step": 306999 }, { "epoch": 3.41, "learning_rate": 3.941448979591837e-05, "loss": 2.2028, "step": 307000 }, { "FLOPS loss": 0.06577987968921661, "L0_d": 1070.66, "MLM loss": 2.0538783073425293, "epoch": 3.41, "step": 307499 }, { "epoch": 3.41, "learning_rate": 3.931244897959184e-05, "loss": 2.2051, "step": 307500 }, { "FLOPS loss": 0.04530545696616173, "L0_d": 745.27, "MLM loss": 2.2376835346221924, "epoch": 3.42, "step": 307999 }, { "epoch": 3.42, "learning_rate": 3.921061224489796e-05, "loss": 2.2035, "step": 308000 }, { "FLOPS loss": 0.04193582385778427, "L0_d": 810.23, "MLM loss": 2.014937400817871, "epoch": 3.42, "step": 308499 }, { "epoch": 3.42, "learning_rate": 3.9108571428571425e-05, "loss": 2.2012, "step": 308500 }, { "FLOPS loss": 0.05774283409118652, "L0_d": 919.94, "MLM loss": 2.243237257003784, "epoch": 3.43, "step": 308999 }, { "epoch": 3.43, "learning_rate": 3.90065306122449e-05, "loss": 2.2086, "step": 309000 }, { "FLOPS loss": 0.05811821296811104, "L0_d": 804.31, "MLM loss": 2.055720567703247, "epoch": 3.43, "step": 309499 }, { "epoch": 3.43, "learning_rate": 3.890448979591837e-05, "loss": 2.2018, "step": 309500 }, { "FLOPS loss": 0.038207173347473145, "L0_d": 657.02, "MLM loss": 2.262584924697876, "epoch": 3.44, "step": 309999 }, { "epoch": 3.44, "learning_rate": 3.8802653061224495e-05, "loss": 2.2093, "step": 310000 }, { "FLOPS loss": 0.047180648893117905, "L0_d": 937.14, "MLM loss": 2.1344897747039795, "epoch": 3.45, "step": 310499 }, { "epoch": 3.45, "learning_rate": 3.870061224489796e-05, "loss": 2.2015, "step": 310500 }, { "FLOPS loss": 0.05457613244652748, "L0_d": 852.89, "MLM loss": 2.2299156188964844, "epoch": 3.45, "step": 310999 }, { "epoch": 3.45, "learning_rate": 3.859857142857143e-05, "loss": 2.202, "step": 311000 }, { "FLOPS loss": 0.04625779762864113, "L0_d": 692.23, "MLM loss": 2.1875410079956055, "epoch": 3.46, "step": 311499 }, { "epoch": 3.46, "learning_rate": 3.84965306122449e-05, "loss": 2.2007, "step": 311500 }, { "FLOPS loss": 0.03990597277879715, "L0_d": 745.91, "MLM loss": 2.1840474605560303, "epoch": 3.46, "step": 311999 }, { "epoch": 3.46, "learning_rate": 3.8394693877551024e-05, "loss": 2.1991, "step": 312000 }, { "FLOPS loss": 0.04576093330979347, "L0_d": 926.02, "MLM loss": 2.3445239067077637, "epoch": 3.47, "step": 312499 }, { "epoch": 3.47, "learning_rate": 3.8292653061224495e-05, "loss": 2.2006, "step": 312500 }, { "FLOPS loss": 0.057950280606746674, "L0_d": 992.88, "MLM loss": 2.311159372329712, "epoch": 3.47, "step": 312999 }, { "epoch": 3.47, "learning_rate": 3.819061224489796e-05, "loss": 2.2021, "step": 313000 }, { "FLOPS loss": 0.057960983365774155, "L0_d": 878.03, "MLM loss": 2.103071689605713, "epoch": 3.48, "step": 313499 }, { "epoch": 3.48, "learning_rate": 3.808857142857143e-05, "loss": 2.2051, "step": 313500 }, { "FLOPS loss": 0.053515635430812836, "L0_d": 886.53, "MLM loss": 2.109595775604248, "epoch": 3.48, "step": 313999 }, { "epoch": 3.48, "learning_rate": 3.798673469387755e-05, "loss": 2.1997, "step": 314000 }, { "FLOPS loss": 0.0709148570895195, "L0_d": 1088.56, "MLM loss": 2.139554262161255, "epoch": 3.49, "step": 314499 }, { "epoch": 3.49, "learning_rate": 3.788469387755102e-05, "loss": 2.1966, "step": 314500 }, { "FLOPS loss": 0.054781876504421234, "L0_d": 867.12, "MLM loss": 2.2548537254333496, "epoch": 3.5, "step": 314999 }, { "epoch": 3.5, "learning_rate": 3.7782653061224495e-05, "loss": 2.2013, "step": 315000 }, { "FLOPS loss": 0.06492281705141068, "L0_d": 888.44, "MLM loss": 2.111560344696045, "epoch": 3.5, "step": 315499 }, { "epoch": 3.5, "learning_rate": 3.768061224489796e-05, "loss": 2.1996, "step": 315500 }, { "FLOPS loss": 0.05108269676566124, "L0_d": 704.98, "MLM loss": 2.195706605911255, "epoch": 3.51, "step": 315999 }, { "epoch": 3.51, "learning_rate": 3.757877551020409e-05, "loss": 2.2021, "step": 316000 }, { "FLOPS loss": 0.04205957427620888, "L0_d": 750.72, "MLM loss": 2.176068067550659, "epoch": 3.51, "step": 316499 }, { "epoch": 3.51, "learning_rate": 3.747693877551021e-05, "loss": 2.1982, "step": 316500 }, { "FLOPS loss": 0.05266688019037247, "L0_d": 875.41, "MLM loss": 1.9634517431259155, "epoch": 3.52, "step": 316999 }, { "epoch": 3.52, "learning_rate": 3.737489795918367e-05, "loss": 2.1989, "step": 317000 }, { "FLOPS loss": 0.05023524537682533, "L0_d": 1007.98, "MLM loss": 2.173258066177368, "epoch": 3.52, "step": 317499 }, { "epoch": 3.52, "learning_rate": 3.7272857142857144e-05, "loss": 2.1996, "step": 317500 }, { "FLOPS loss": 0.04701191186904907, "L0_d": 759.47, "MLM loss": 2.2634153366088867, "epoch": 3.53, "step": 317999 }, { "epoch": 3.53, "learning_rate": 3.7170816326530615e-05, "loss": 2.1962, "step": 318000 }, { "FLOPS loss": 0.04945531487464905, "L0_d": 933.91, "MLM loss": 2.234160900115967, "epoch": 3.53, "step": 318499 }, { "epoch": 3.53, "learning_rate": 3.706877551020409e-05, "loss": 2.2016, "step": 318500 }, { "FLOPS loss": 0.0607781708240509, "L0_d": 1116.78, "MLM loss": 2.1579678058624268, "epoch": 3.54, "step": 318999 }, { "epoch": 3.54, "learning_rate": 3.696673469387755e-05, "loss": 2.1972, "step": 319000 }, { "FLOPS loss": 0.04679537191987038, "L0_d": 809.64, "MLM loss": 2.115091323852539, "epoch": 3.55, "step": 319499 }, { "epoch": 3.55, "learning_rate": 3.686469387755102e-05, "loss": 2.1947, "step": 319500 }, { "FLOPS loss": 0.05474882200360298, "L0_d": 835.75, "MLM loss": 2.3359487056732178, "epoch": 3.55, "step": 319999 }, { "epoch": 3.55, "learning_rate": 3.676265306122449e-05, "loss": 2.1996, "step": 320000 }, { "FLOPS loss": 0.060804739594459534, "L0_d": 994.77, "MLM loss": 2.015943765640259, "epoch": 3.56, "step": 320499 }, { "epoch": 3.56, "learning_rate": 3.666081632653061e-05, "loss": 2.1949, "step": 320500 }, { "FLOPS loss": 0.050524868071079254, "L0_d": 743.78, "MLM loss": 2.1780242919921875, "epoch": 3.56, "step": 320999 }, { "epoch": 3.56, "learning_rate": 3.655877551020409e-05, "loss": 2.1974, "step": 321000 }, { "FLOPS loss": 0.050447240471839905, "L0_d": 825.23, "MLM loss": 2.085819721221924, "epoch": 3.57, "step": 321499 }, { "epoch": 3.57, "learning_rate": 3.645673469387755e-05, "loss": 2.2014, "step": 321500 }, { "FLOPS loss": 0.041406333446502686, "L0_d": 757.69, "MLM loss": 2.3312458992004395, "epoch": 3.57, "step": 321999 }, { "epoch": 3.57, "learning_rate": 3.635469387755102e-05, "loss": 2.1951, "step": 322000 }, { "FLOPS loss": 0.061615899205207825, "L0_d": 1283.58, "MLM loss": 2.0845348834991455, "epoch": 3.58, "step": 322499 }, { "epoch": 3.58, "learning_rate": 3.6252857142857144e-05, "loss": 2.1958, "step": 322500 }, { "FLOPS loss": 0.05783267691731453, "L0_d": 890.2, "MLM loss": 1.970130205154419, "epoch": 3.58, "step": 322999 }, { "epoch": 3.58, "learning_rate": 3.6151020408163264e-05, "loss": 2.1966, "step": 323000 }, { "FLOPS loss": 0.052105650305747986, "L0_d": 952.23, "MLM loss": 2.2042036056518555, "epoch": 3.59, "step": 323499 }, { "epoch": 3.59, "learning_rate": 3.6048979591836736e-05, "loss": 2.1937, "step": 323500 }, { "FLOPS loss": 0.05369475111365318, "L0_d": 986.03, "MLM loss": 2.3424527645111084, "epoch": 3.6, "step": 323999 }, { "epoch": 3.6, "learning_rate": 3.594693877551021e-05, "loss": 2.1977, "step": 324000 }, { "FLOPS loss": 0.06047675758600235, "L0_d": 985.45, "MLM loss": 2.2566022872924805, "epoch": 3.6, "step": 324499 }, { "epoch": 3.6, "learning_rate": 3.584489795918368e-05, "loss": 2.1937, "step": 324500 }, { "FLOPS loss": 0.05474085733294487, "L0_d": 958.11, "MLM loss": 2.118408679962158, "epoch": 3.61, "step": 324999 }, { "epoch": 3.61, "learning_rate": 3.574285714285714e-05, "loss": 2.1999, "step": 325000 }, { "FLOPS loss": 0.05176917463541031, "L0_d": 870.19, "MLM loss": 2.072502374649048, "epoch": 3.61, "step": 325499 }, { "epoch": 3.61, "learning_rate": 3.5640816326530615e-05, "loss": 2.1943, "step": 325500 }, { "FLOPS loss": 0.055069420486688614, "L0_d": 798.97, "MLM loss": 2.062601327896118, "epoch": 3.62, "step": 325999 }, { "epoch": 3.62, "learning_rate": 3.553877551020408e-05, "loss": 2.1938, "step": 326000 }, { "FLOPS loss": 0.05477731674909592, "L0_d": 870.95, "MLM loss": 2.130213499069214, "epoch": 3.62, "step": 326499 }, { "epoch": 3.62, "learning_rate": 3.543673469387756e-05, "loss": 2.1925, "step": 326500 }, { "FLOPS loss": 0.04554872214794159, "L0_d": 776.23, "MLM loss": 2.0929856300354004, "epoch": 3.63, "step": 326999 }, { "epoch": 3.63, "learning_rate": 3.533489795918368e-05, "loss": 2.1943, "step": 327000 }, { "FLOPS loss": 0.0540357269346714, "L0_d": 850.91, "MLM loss": 2.24971342086792, "epoch": 3.63, "step": 327499 }, { "epoch": 3.63, "learning_rate": 3.523285714285714e-05, "loss": 2.188, "step": 327500 }, { "FLOPS loss": 0.07205560803413391, "L0_d": 997.16, "MLM loss": 2.185397148132324, "epoch": 3.64, "step": 327999 }, { "epoch": 3.64, "learning_rate": 3.5130816326530615e-05, "loss": 2.1909, "step": 328000 }, { "FLOPS loss": 0.05981717258691788, "L0_d": 941.84, "MLM loss": 2.0708162784576416, "epoch": 3.65, "step": 328499 }, { "epoch": 3.65, "learning_rate": 3.502877551020408e-05, "loss": 2.1925, "step": 328500 }, { "FLOPS loss": 0.05793330445885658, "L0_d": 871.94, "MLM loss": 2.1125102043151855, "epoch": 3.65, "step": 328999 }, { "epoch": 3.65, "learning_rate": 3.492693877551021e-05, "loss": 2.1927, "step": 329000 }, { "FLOPS loss": 0.05422879382967949, "L0_d": 968.86, "MLM loss": 2.1457128524780273, "epoch": 3.66, "step": 329499 }, { "epoch": 3.66, "learning_rate": 3.482489795918368e-05, "loss": 2.1912, "step": 329500 }, { "FLOPS loss": 0.049111463129520416, "L0_d": 684.58, "MLM loss": 2.127558708190918, "epoch": 3.66, "step": 329999 }, { "epoch": 3.66, "learning_rate": 3.472285714285714e-05, "loss": 2.1868, "step": 330000 }, { "FLOPS loss": 0.05449121445417404, "L0_d": 903.81, "MLM loss": 2.3272576332092285, "epoch": 3.67, "step": 330499 }, { "epoch": 3.67, "learning_rate": 3.4620816326530614e-05, "loss": 2.1887, "step": 330500 }, { "FLOPS loss": 0.05086396634578705, "L0_d": 803.14, "MLM loss": 2.0519890785217285, "epoch": 3.67, "step": 330999 }, { "epoch": 3.67, "learning_rate": 3.451877551020408e-05, "loss": 2.1913, "step": 331000 }, { "FLOPS loss": 0.06123311445116997, "L0_d": 1143.92, "MLM loss": 2.1163411140441895, "epoch": 3.68, "step": 331499 }, { "epoch": 3.68, "learning_rate": 3.4416938775510207e-05, "loss": 2.1876, "step": 331500 }, { "FLOPS loss": 0.05095098912715912, "L0_d": 973.16, "MLM loss": 2.173110008239746, "epoch": 3.68, "step": 331999 }, { "epoch": 3.68, "learning_rate": 3.431489795918367e-05, "loss": 2.1839, "step": 332000 }, { "FLOPS loss": 0.045458439737558365, "L0_d": 716.03, "MLM loss": 2.1254405975341797, "epoch": 3.69, "step": 332499 }, { "epoch": 3.69, "learning_rate": 3.421285714285715e-05, "loss": 2.1905, "step": 332500 }, { "FLOPS loss": 0.06918340921401978, "L0_d": 1090.33, "MLM loss": 1.9944758415222168, "epoch": 3.7, "step": 332999 }, { "epoch": 3.7, "learning_rate": 3.4110816326530614e-05, "loss": 2.1878, "step": 333000 }, { "FLOPS loss": 0.048804864287376404, "L0_d": 833.94, "MLM loss": 2.1066293716430664, "epoch": 3.7, "step": 333499 }, { "epoch": 3.7, "learning_rate": 3.4008979591836735e-05, "loss": 2.1912, "step": 333500 }, { "FLOPS loss": 0.052378445863723755, "L0_d": 874.52, "MLM loss": 2.2261409759521484, "epoch": 3.71, "step": 333999 }, { "epoch": 3.71, "learning_rate": 3.3906938775510206e-05, "loss": 2.186, "step": 334000 }, { "FLOPS loss": 0.04796016216278076, "L0_d": 930.25, "MLM loss": 2.1158313751220703, "epoch": 3.71, "step": 334499 }, { "epoch": 3.71, "learning_rate": 3.380489795918367e-05, "loss": 2.1914, "step": 334500 }, { "FLOPS loss": 0.044372353702783585, "L0_d": 851.02, "MLM loss": 2.211679458618164, "epoch": 3.72, "step": 334999 }, { "epoch": 3.72, "learning_rate": 3.370285714285715e-05, "loss": 2.1888, "step": 335000 }, { "FLOPS loss": 0.04598099738359451, "L0_d": 929.05, "MLM loss": 2.070303440093994, "epoch": 3.72, "step": 335499 }, { "epoch": 3.72, "learning_rate": 3.360102040816327e-05, "loss": 2.1878, "step": 335500 }, { "FLOPS loss": 0.062344592064619064, "L0_d": 1101.39, "MLM loss": 2.1628119945526123, "epoch": 3.73, "step": 335999 }, { "epoch": 3.73, "learning_rate": 3.3498979591836735e-05, "loss": 2.1885, "step": 336000 }, { "FLOPS loss": 0.04801291599869728, "L0_d": 841.2, "MLM loss": 2.3216090202331543, "epoch": 3.73, "step": 336499 }, { "epoch": 3.73, "learning_rate": 3.3396938775510206e-05, "loss": 2.1891, "step": 336500 }, { "FLOPS loss": 0.06707286089658737, "L0_d": 898.95, "MLM loss": 2.1534814834594727, "epoch": 3.74, "step": 336999 }, { "epoch": 3.74, "learning_rate": 3.329489795918367e-05, "loss": 2.1795, "step": 337000 }, { "FLOPS loss": 0.05150389298796654, "L0_d": 955.3, "MLM loss": 2.1554229259490967, "epoch": 3.75, "step": 337499 }, { "epoch": 3.75, "learning_rate": 3.319285714285714e-05, "loss": 2.1875, "step": 337500 }, { "FLOPS loss": 0.06221503019332886, "L0_d": 922.75, "MLM loss": 2.015026569366455, "epoch": 3.75, "step": 337999 }, { "epoch": 3.75, "learning_rate": 3.309102040816326e-05, "loss": 2.1851, "step": 338000 }, { "FLOPS loss": 0.04698051139712334, "L0_d": 818.77, "MLM loss": 2.1614816188812256, "epoch": 3.76, "step": 338499 }, { "epoch": 3.76, "learning_rate": 3.298897959183674e-05, "loss": 2.1844, "step": 338500 }, { "FLOPS loss": 0.05993029475212097, "L0_d": 1221.02, "MLM loss": 2.2374682426452637, "epoch": 3.76, "step": 338999 }, { "epoch": 3.76, "learning_rate": 3.2886938775510206e-05, "loss": 2.1891, "step": 339000 }, { "FLOPS loss": 0.059028953313827515, "L0_d": 867.31, "MLM loss": 2.1483635902404785, "epoch": 3.77, "step": 339499 }, { "epoch": 3.77, "learning_rate": 3.278489795918368e-05, "loss": 2.1837, "step": 339500 }, { "FLOPS loss": 0.04527248814702034, "L0_d": 1009.0, "MLM loss": 1.9461712837219238, "epoch": 3.77, "step": 339999 }, { "epoch": 3.77, "learning_rate": 3.26830612244898e-05, "loss": 2.1858, "step": 340000 }, { "FLOPS loss": 0.05926721170544624, "L0_d": 1004.55, "MLM loss": 2.0865094661712646, "epoch": 3.78, "step": 340499 }, { "epoch": 3.78, "learning_rate": 3.258102040816326e-05, "loss": 2.1878, "step": 340500 }, { "FLOPS loss": 0.06988593935966492, "L0_d": 1040.44, "MLM loss": 2.140446901321411, "epoch": 3.78, "step": 340999 }, { "epoch": 3.78, "learning_rate": 3.247897959183674e-05, "loss": 2.1865, "step": 341000 }, { "FLOPS loss": 0.052927710115909576, "L0_d": 864.53, "MLM loss": 2.0716216564178467, "epoch": 3.79, "step": 341499 }, { "epoch": 3.79, "learning_rate": 3.2376938775510206e-05, "loss": 2.1843, "step": 341500 }, { "FLOPS loss": 0.06451366096735, "L0_d": 1010.72, "MLM loss": 2.246471405029297, "epoch": 3.8, "step": 341999 }, { "epoch": 3.8, "learning_rate": 3.2275102040816326e-05, "loss": 2.1844, "step": 342000 }, { "FLOPS loss": 0.06212235987186432, "L0_d": 907.92, "MLM loss": 2.0768909454345703, "epoch": 3.8, "step": 342499 }, { "epoch": 3.8, "learning_rate": 3.21730612244898e-05, "loss": 2.183, "step": 342500 }, { "FLOPS loss": 0.058473605662584305, "L0_d": 987.47, "MLM loss": 2.2050387859344482, "epoch": 3.81, "step": 342999 }, { "epoch": 3.81, "learning_rate": 3.207102040816326e-05, "loss": 2.1786, "step": 343000 }, { "FLOPS loss": 0.055733438581228256, "L0_d": 943.78, "MLM loss": 2.175384283065796, "epoch": 3.81, "step": 343499 }, { "epoch": 3.81, "learning_rate": 3.1968979591836734e-05, "loss": 2.1811, "step": 343500 }, { "FLOPS loss": 0.0499834381043911, "L0_d": 870.72, "MLM loss": 2.2518651485443115, "epoch": 3.82, "step": 343999 }, { "epoch": 3.82, "learning_rate": 3.1866938775510206e-05, "loss": 2.186, "step": 344000 }, { "FLOPS loss": 0.046837545931339264, "L0_d": 786.98, "MLM loss": 2.0223822593688965, "epoch": 3.82, "step": 344499 }, { "epoch": 3.82, "learning_rate": 3.176510204081633e-05, "loss": 2.1835, "step": 344500 }, { "FLOPS loss": 0.060403671115636826, "L0_d": 1085.55, "MLM loss": 2.087925910949707, "epoch": 3.83, "step": 344999 }, { "epoch": 3.83, "learning_rate": 3.16630612244898e-05, "loss": 2.1837, "step": 345000 }, { "FLOPS loss": 0.06658036261796951, "L0_d": 992.7, "MLM loss": 2.1821467876434326, "epoch": 3.83, "step": 345499 }, { "epoch": 3.83, "learning_rate": 3.156102040816327e-05, "loss": 2.1803, "step": 345500 }, { "FLOPS loss": 0.04759270325303078, "L0_d": 750.38, "MLM loss": 2.184410333633423, "epoch": 3.84, "step": 345999 }, { "epoch": 3.84, "learning_rate": 3.1458979591836734e-05, "loss": 2.1841, "step": 346000 }, { "FLOPS loss": 0.041197799146175385, "L0_d": 889.83, "MLM loss": 2.058509349822998, "epoch": 3.85, "step": 346499 }, { "epoch": 3.85, "learning_rate": 3.1357142857142855e-05, "loss": 2.18, "step": 346500 }, { "FLOPS loss": 0.04344596341252327, "L0_d": 786.92, "MLM loss": 2.049893856048584, "epoch": 3.85, "step": 346999 }, { "epoch": 3.85, "learning_rate": 3.1255102040816326e-05, "loss": 2.1807, "step": 347000 }, { "FLOPS loss": 0.05742944777011871, "L0_d": 889.94, "MLM loss": 2.061300754547119, "epoch": 3.86, "step": 347499 }, { "epoch": 3.86, "learning_rate": 3.11530612244898e-05, "loss": 2.1818, "step": 347500 }, { "FLOPS loss": 0.05734623968601227, "L0_d": 947.06, "MLM loss": 2.2282285690307617, "epoch": 3.86, "step": 347999 }, { "epoch": 3.86, "learning_rate": 3.105102040816327e-05, "loss": 2.177, "step": 348000 }, { "FLOPS loss": 0.0605790801346302, "L0_d": 872.59, "MLM loss": 2.027951955795288, "epoch": 3.87, "step": 348499 }, { "epoch": 3.87, "learning_rate": 3.0948979591836734e-05, "loss": 2.1829, "step": 348500 }, { "FLOPS loss": 0.06605564057826996, "L0_d": 1122.91, "MLM loss": 2.1934096813201904, "epoch": 3.87, "step": 348999 }, { "epoch": 3.87, "learning_rate": 3.084714285714286e-05, "loss": 2.1874, "step": 349000 }, { "FLOPS loss": 0.04472722113132477, "L0_d": 838.17, "MLM loss": 2.1076998710632324, "epoch": 3.88, "step": 349499 }, { "epoch": 3.88, "learning_rate": 3.0745102040816326e-05, "loss": 2.178, "step": 349500 }, { "FLOPS loss": 0.06424874067306519, "L0_d": 1012.67, "MLM loss": 2.0305850505828857, "epoch": 3.88, "step": 349999 }, { "epoch": 3.88, "learning_rate": 3.06430612244898e-05, "loss": 2.1821, "step": 350000 }, { "FLOPS loss": 0.047599222511053085, "L0_d": 845.36, "MLM loss": 2.1980504989624023, "epoch": 3.89, "step": 350499 }, { "epoch": 3.89, "learning_rate": 3.054102040816327e-05, "loss": 2.1768, "step": 350500 }, { "FLOPS loss": 0.05459141731262207, "L0_d": 815.31, "MLM loss": 2.1290721893310547, "epoch": 3.9, "step": 350999 }, { "epoch": 3.9, "learning_rate": 3.043918367346939e-05, "loss": 2.1751, "step": 351000 }, { "FLOPS loss": 0.051371000707149506, "L0_d": 945.81, "MLM loss": 2.071333885192871, "epoch": 3.9, "step": 351499 }, { "epoch": 3.9, "learning_rate": 3.0337142857142858e-05, "loss": 2.1799, "step": 351500 }, { "FLOPS loss": 0.038782838732004166, "L0_d": 681.86, "MLM loss": 2.0437278747558594, "epoch": 3.91, "step": 351999 }, { "epoch": 3.91, "learning_rate": 3.0235102040816326e-05, "loss": 2.1777, "step": 352000 }, { "FLOPS loss": 0.06115822494029999, "L0_d": 942.25, "MLM loss": 2.0766518115997314, "epoch": 3.91, "step": 352499 }, { "epoch": 3.91, "learning_rate": 3.0133061224489794e-05, "loss": 2.1759, "step": 352500 }, { "FLOPS loss": 0.05320432037115097, "L0_d": 850.69, "MLM loss": 2.1417808532714844, "epoch": 3.92, "step": 352999 }, { "epoch": 3.92, "learning_rate": 3.0031224489795918e-05, "loss": 2.1755, "step": 353000 }, { "FLOPS loss": 0.061143022030591965, "L0_d": 1056.31, "MLM loss": 2.226233720779419, "epoch": 3.92, "step": 353499 }, { "epoch": 3.92, "learning_rate": 2.992918367346939e-05, "loss": 2.1755, "step": 353500 }, { "FLOPS loss": 0.05656610056757927, "L0_d": 827.31, "MLM loss": 2.221358299255371, "epoch": 3.93, "step": 353999 }, { "epoch": 3.93, "learning_rate": 2.9827142857142857e-05, "loss": 2.1742, "step": 354000 }, { "FLOPS loss": 0.04866122454404831, "L0_d": 854.88, "MLM loss": 1.9438591003417969, "epoch": 3.93, "step": 354499 }, { "epoch": 3.93, "learning_rate": 2.972510204081633e-05, "loss": 2.1764, "step": 354500 }, { "FLOPS loss": 0.06544341146945953, "L0_d": 1079.08, "MLM loss": 2.112839937210083, "epoch": 3.94, "step": 354999 }, { "epoch": 3.94, "learning_rate": 2.962326530612245e-05, "loss": 2.1795, "step": 355000 }, { "FLOPS loss": 0.06201547756791115, "L0_d": 936.14, "MLM loss": 2.167311668395996, "epoch": 3.95, "step": 355499 }, { "epoch": 3.95, "learning_rate": 2.9521224489795918e-05, "loss": 2.1735, "step": 355500 }, { "FLOPS loss": 0.051303308457136154, "L0_d": 795.86, "MLM loss": 2.197533130645752, "epoch": 3.95, "step": 355999 }, { "epoch": 3.95, "learning_rate": 2.9419183673469392e-05, "loss": 2.1739, "step": 356000 }, { "FLOPS loss": 0.0616656057536602, "L0_d": 1064.95, "MLM loss": 2.179872512817383, "epoch": 3.96, "step": 356499 }, { "epoch": 3.96, "learning_rate": 2.931714285714286e-05, "loss": 2.1797, "step": 356500 }, { "FLOPS loss": 0.045415300875902176, "L0_d": 884.09, "MLM loss": 2.3073251247406006, "epoch": 3.96, "step": 356999 }, { "epoch": 3.96, "learning_rate": 2.921530612244898e-05, "loss": 2.1762, "step": 357000 }, { "FLOPS loss": 0.06393018364906311, "L0_d": 1106.06, "MLM loss": 2.003974437713623, "epoch": 3.97, "step": 357499 }, { "epoch": 3.97, "learning_rate": 2.911326530612245e-05, "loss": 2.1694, "step": 357500 }, { "FLOPS loss": 0.05075768381357193, "L0_d": 940.47, "MLM loss": 2.3486485481262207, "epoch": 3.97, "step": 357999 }, { "epoch": 3.97, "learning_rate": 2.9011224489795917e-05, "loss": 2.1673, "step": 358000 }, { "FLOPS loss": 0.057089854031801224, "L0_d": 985.64, "MLM loss": 2.1064419746398926, "epoch": 3.98, "step": 358499 }, { "epoch": 3.98, "learning_rate": 2.8909183673469386e-05, "loss": 2.1707, "step": 358500 }, { "FLOPS loss": 0.05326242372393608, "L0_d": 928.56, "MLM loss": 2.0739479064941406, "epoch": 3.98, "step": 358999 }, { "epoch": 3.98, "learning_rate": 2.880714285714286e-05, "loss": 2.173, "step": 359000 }, { "FLOPS loss": 0.04545772448182106, "L0_d": 869.53, "MLM loss": 2.0205798149108887, "epoch": 3.99, "step": 359499 }, { "epoch": 3.99, "learning_rate": 2.8705306122448984e-05, "loss": 2.1721, "step": 359500 }, { "FLOPS loss": 0.05554395541548729, "L0_d": 1092.84, "MLM loss": 2.124103546142578, "epoch": 4.0, "step": 359999 }, { "epoch": 4.0, "learning_rate": 2.8603265306122453e-05, "loss": 2.1727, "step": 360000 }, { "FLOPS loss": 0.06262467801570892, "L0_d": 943.38, "MLM loss": 2.1242740154266357, "epoch": 4.0, "step": 360499 }, { "epoch": 4.0, "learning_rate": 2.850122448979592e-05, "loss": 2.1712, "step": 360500 }, { "FLOPS loss": 0.05900612473487854, "L0_d": 843.02, "MLM loss": 2.230020046234131, "epoch": 4.01, "step": 360999 }, { "epoch": 4.01, "learning_rate": 2.839918367346939e-05, "loss": 2.1692, "step": 361000 }, { "FLOPS loss": 0.04395557567477226, "L0_d": 804.78, "MLM loss": 2.010903835296631, "epoch": 4.01, "step": 361499 }, { "epoch": 4.01, "learning_rate": 2.829734693877551e-05, "loss": 2.1715, "step": 361500 }, { "FLOPS loss": 0.051260050386190414, "L0_d": 875.17, "MLM loss": 2.001664638519287, "epoch": 4.02, "step": 361999 }, { "epoch": 4.02, "learning_rate": 2.8195306122448984e-05, "loss": 2.1702, "step": 362000 }, { "FLOPS loss": 0.06399668753147125, "L0_d": 1129.27, "MLM loss": 2.1489925384521484, "epoch": 4.02, "step": 362499 }, { "epoch": 4.02, "learning_rate": 2.8093265306122452e-05, "loss": 2.1714, "step": 362500 }, { "FLOPS loss": 0.05227614566683769, "L0_d": 946.19, "MLM loss": 2.1537365913391113, "epoch": 4.03, "step": 362999 }, { "epoch": 4.03, "learning_rate": 2.799122448979592e-05, "loss": 2.1698, "step": 363000 }, { "FLOPS loss": 0.03925257921218872, "L0_d": 690.3, "MLM loss": 2.3284449577331543, "epoch": 4.03, "step": 363499 }, { "epoch": 4.03, "learning_rate": 2.788918367346939e-05, "loss": 2.1655, "step": 363500 }, { "FLOPS loss": 0.049571748822927475, "L0_d": 851.61, "MLM loss": 1.9855554103851318, "epoch": 4.04, "step": 363999 }, { "epoch": 4.04, "learning_rate": 2.778734693877551e-05, "loss": 2.1713, "step": 364000 }, { "FLOPS loss": 0.05379843711853027, "L0_d": 1056.48, "MLM loss": 2.0610368251800537, "epoch": 4.05, "step": 364499 }, { "epoch": 4.05, "learning_rate": 2.7685306122448977e-05, "loss": 2.1655, "step": 364500 }, { "FLOPS loss": 0.04581060633063316, "L0_d": 672.34, "MLM loss": 2.216171979904175, "epoch": 4.05, "step": 364999 }, { "epoch": 4.05, "learning_rate": 2.7583265306122452e-05, "loss": 2.1702, "step": 365000 }, { "FLOPS loss": 0.05021185055375099, "L0_d": 776.2, "MLM loss": 2.0434820652008057, "epoch": 4.06, "step": 365499 }, { "epoch": 4.06, "learning_rate": 2.748122448979592e-05, "loss": 2.167, "step": 365500 }, { "FLOPS loss": 0.07118741422891617, "L0_d": 1148.89, "MLM loss": 2.0388269424438477, "epoch": 4.06, "step": 365999 }, { "epoch": 4.06, "learning_rate": 2.7379387755102044e-05, "loss": 2.1674, "step": 366000 }, { "FLOPS loss": 0.06101416423916817, "L0_d": 934.8, "MLM loss": 2.142348289489746, "epoch": 4.07, "step": 366499 }, { "epoch": 4.07, "learning_rate": 2.7277346938775512e-05, "loss": 2.1717, "step": 366500 }, { "FLOPS loss": 0.05547558516263962, "L0_d": 821.56, "MLM loss": 2.1603124141693115, "epoch": 4.07, "step": 366999 }, { "epoch": 4.07, "learning_rate": 2.717530612244898e-05, "loss": 2.1676, "step": 367000 }, { "FLOPS loss": 0.051259007304906845, "L0_d": 842.31, "MLM loss": 1.9229415655136108, "epoch": 4.08, "step": 367499 }, { "epoch": 4.08, "learning_rate": 2.7073265306122452e-05, "loss": 2.1677, "step": 367500 }, { "FLOPS loss": 0.060752104967832565, "L0_d": 1049.27, "MLM loss": 2.21079158782959, "epoch": 4.08, "step": 367999 }, { "epoch": 4.08, "learning_rate": 2.6971428571428576e-05, "loss": 2.17, "step": 368000 }, { "FLOPS loss": 0.055672332644462585, "L0_d": 867.62, "MLM loss": 1.9942853450775146, "epoch": 4.09, "step": 368499 }, { "epoch": 4.09, "learning_rate": 2.6869387755102044e-05, "loss": 2.164, "step": 368500 }, { "FLOPS loss": 0.06367730349302292, "L0_d": 1018.25, "MLM loss": 2.164059638977051, "epoch": 4.1, "step": 368999 }, { "epoch": 4.1, "learning_rate": 2.6767346938775512e-05, "loss": 2.165, "step": 369000 }, { "FLOPS loss": 0.044330112636089325, "L0_d": 700.44, "MLM loss": 2.0051159858703613, "epoch": 4.1, "step": 369499 }, { "epoch": 4.1, "learning_rate": 2.666530612244898e-05, "loss": 2.1662, "step": 369500 }, { "FLOPS loss": 0.049619223922491074, "L0_d": 835.39, "MLM loss": 2.1635875701904297, "epoch": 4.11, "step": 369999 }, { "epoch": 4.11, "learning_rate": 2.656326530612245e-05, "loss": 2.1674, "step": 370000 }, { "FLOPS loss": 0.06112358346581459, "L0_d": 924.84, "MLM loss": 1.9283922910690308, "epoch": 4.11, "step": 370499 }, { "epoch": 4.11, "learning_rate": 2.646142857142857e-05, "loss": 2.1643, "step": 370500 }, { "FLOPS loss": 0.052606042474508286, "L0_d": 862.92, "MLM loss": 2.0105972290039062, "epoch": 4.12, "step": 370999 }, { "epoch": 4.12, "learning_rate": 2.6359387755102044e-05, "loss": 2.1659, "step": 371000 }, { "FLOPS loss": 0.04724416881799698, "L0_d": 863.03, "MLM loss": 2.0921969413757324, "epoch": 4.12, "step": 371499 }, { "epoch": 4.12, "learning_rate": 2.6257346938775512e-05, "loss": 2.1638, "step": 371500 }, { "FLOPS loss": 0.047573573887348175, "L0_d": 825.17, "MLM loss": 2.2761223316192627, "epoch": 4.13, "step": 371999 }, { "epoch": 4.13, "learning_rate": 2.615530612244898e-05, "loss": 2.1609, "step": 372000 }, { "FLOPS loss": 0.05940329283475876, "L0_d": 985.84, "MLM loss": 2.022876262664795, "epoch": 4.13, "step": 372499 }, { "epoch": 4.13, "learning_rate": 2.6053469387755104e-05, "loss": 2.1647, "step": 372500 }, { "FLOPS loss": 0.04888923093676567, "L0_d": 887.06, "MLM loss": 2.171447515487671, "epoch": 4.14, "step": 372999 }, { "epoch": 4.14, "learning_rate": 2.5951428571428572e-05, "loss": 2.1617, "step": 373000 }, { "FLOPS loss": 0.05977650731801987, "L0_d": 1006.73, "MLM loss": 2.043025493621826, "epoch": 4.15, "step": 373499 }, { "epoch": 4.15, "learning_rate": 2.5849387755102044e-05, "loss": 2.1654, "step": 373500 }, { "FLOPS loss": 0.06310431659221649, "L0_d": 1241.34, "MLM loss": 2.1716365814208984, "epoch": 4.15, "step": 373999 }, { "epoch": 4.15, "learning_rate": 2.5747346938775512e-05, "loss": 2.1656, "step": 374000 }, { "FLOPS loss": 0.059984106570482254, "L0_d": 909.22, "MLM loss": 2.169152021408081, "epoch": 4.16, "step": 374499 }, { "epoch": 4.16, "learning_rate": 2.5645510204081636e-05, "loss": 2.1659, "step": 374500 }, { "FLOPS loss": 0.05839752033352852, "L0_d": 993.83, "MLM loss": 2.003178119659424, "epoch": 4.16, "step": 374999 }, { "epoch": 4.16, "learning_rate": 2.5543469387755104e-05, "loss": 2.1632, "step": 375000 }, { "FLOPS loss": 0.0569823794066906, "L0_d": 899.14, "MLM loss": 2.1752469539642334, "epoch": 4.17, "step": 375499 }, { "epoch": 4.17, "learning_rate": 2.5441428571428572e-05, "loss": 2.1595, "step": 375500 }, { "FLOPS loss": 0.04950562119483948, "L0_d": 900.66, "MLM loss": 1.9586538076400757, "epoch": 4.17, "step": 375999 }, { "epoch": 4.17, "learning_rate": 2.533938775510204e-05, "loss": 2.1624, "step": 376000 }, { "FLOPS loss": 0.06744813919067383, "L0_d": 1030.14, "MLM loss": 2.184413194656372, "epoch": 4.18, "step": 376499 }, { "epoch": 4.18, "learning_rate": 2.5237346938775515e-05, "loss": 2.1644, "step": 376500 }, { "FLOPS loss": 0.04473467916250229, "L0_d": 862.69, "MLM loss": 2.1169168949127197, "epoch": 4.18, "step": 376999 }, { "epoch": 4.18, "learning_rate": 2.5135510204081636e-05, "loss": 2.165, "step": 377000 }, { "FLOPS loss": 0.04983612522482872, "L0_d": 848.08, "MLM loss": 2.1897661685943604, "epoch": 4.19, "step": 377499 }, { "epoch": 4.19, "learning_rate": 2.5033469387755104e-05, "loss": 2.1635, "step": 377500 }, { "FLOPS loss": 0.04548395797610283, "L0_d": 848.41, "MLM loss": 2.0270395278930664, "epoch": 4.19, "step": 377999 }, { "epoch": 4.19, "learning_rate": 2.4931428571428572e-05, "loss": 2.1634, "step": 378000 }, { "FLOPS loss": 0.05541262775659561, "L0_d": 868.72, "MLM loss": 2.27474045753479, "epoch": 4.2, "step": 378499 }, { "epoch": 4.2, "learning_rate": 2.4829387755102043e-05, "loss": 2.1635, "step": 378500 }, { "FLOPS loss": 0.0489383228123188, "L0_d": 778.47, "MLM loss": 2.115077018737793, "epoch": 4.21, "step": 378999 }, { "epoch": 4.21, "learning_rate": 2.4727551020408164e-05, "loss": 2.1639, "step": 379000 }, { "FLOPS loss": 0.0530565045773983, "L0_d": 863.83, "MLM loss": 1.9883641004562378, "epoch": 4.21, "step": 379499 }, { "epoch": 4.21, "learning_rate": 2.4625510204081635e-05, "loss": 2.1605, "step": 379500 }, { "FLOPS loss": 0.05226287618279457, "L0_d": 862.03, "MLM loss": 1.8738837242126465, "epoch": 4.22, "step": 379999 }, { "epoch": 4.22, "learning_rate": 2.4523469387755104e-05, "loss": 2.1608, "step": 380000 }, { "FLOPS loss": 0.061256762593984604, "L0_d": 907.98, "MLM loss": 2.204589366912842, "epoch": 4.22, "step": 380499 }, { "epoch": 4.22, "learning_rate": 2.442142857142857e-05, "loss": 2.16, "step": 380500 }, { "FLOPS loss": 0.0489281490445137, "L0_d": 768.95, "MLM loss": 2.0815811157226562, "epoch": 4.23, "step": 380999 }, { "epoch": 4.23, "learning_rate": 2.4319591836734696e-05, "loss": 2.1586, "step": 381000 }, { "FLOPS loss": 0.05553525686264038, "L0_d": 801.36, "MLM loss": 2.1102046966552734, "epoch": 4.23, "step": 381499 }, { "epoch": 4.23, "learning_rate": 2.4217551020408164e-05, "loss": 2.162, "step": 381500 }, { "FLOPS loss": 0.06935592740774155, "L0_d": 1254.78, "MLM loss": 2.175985336303711, "epoch": 4.24, "step": 381999 }, { "epoch": 4.24, "learning_rate": 2.4115510204081635e-05, "loss": 2.1599, "step": 382000 }, { "FLOPS loss": 0.053525280207395554, "L0_d": 839.86, "MLM loss": 2.2302515506744385, "epoch": 4.24, "step": 382499 }, { "epoch": 4.24, "learning_rate": 2.4013469387755103e-05, "loss": 2.1593, "step": 382500 }, { "FLOPS loss": 0.058649156242609024, "L0_d": 910.48, "MLM loss": 1.9512641429901123, "epoch": 4.25, "step": 382999 }, { "epoch": 4.25, "learning_rate": 2.3911632653061224e-05, "loss": 2.1586, "step": 383000 }, { "FLOPS loss": 0.05519488453865051, "L0_d": 923.22, "MLM loss": 2.107400417327881, "epoch": 4.26, "step": 383499 }, { "epoch": 4.26, "learning_rate": 2.3809591836734696e-05, "loss": 2.1609, "step": 383500 }, { "FLOPS loss": 0.05247490853071213, "L0_d": 822.2, "MLM loss": 2.09049654006958, "epoch": 4.26, "step": 383999 }, { "epoch": 4.26, "learning_rate": 2.3707551020408164e-05, "loss": 2.1618, "step": 384000 }, { "FLOPS loss": 0.058925412595272064, "L0_d": 975.97, "MLM loss": 2.165003538131714, "epoch": 4.27, "step": 384499 }, { "epoch": 4.27, "learning_rate": 2.3605510204081635e-05, "loss": 2.1558, "step": 384500 }, { "FLOPS loss": 0.05863140895962715, "L0_d": 1055.45, "MLM loss": 2.122023105621338, "epoch": 4.27, "step": 384999 }, { "epoch": 4.27, "learning_rate": 2.350367346938776e-05, "loss": 2.1591, "step": 385000 }, { "FLOPS loss": 0.06239921599626541, "L0_d": 1168.05, "MLM loss": 2.1006088256835938, "epoch": 4.28, "step": 385499 }, { "epoch": 4.28, "learning_rate": 2.3401632653061227e-05, "loss": 2.1592, "step": 385500 }, { "FLOPS loss": 0.06413101404905319, "L0_d": 905.56, "MLM loss": 2.3184633255004883, "epoch": 4.28, "step": 385999 }, { "epoch": 4.28, "learning_rate": 2.3299591836734695e-05, "loss": 2.1582, "step": 386000 }, { "FLOPS loss": 0.06148085743188858, "L0_d": 1015.36, "MLM loss": 2.184425115585327, "epoch": 4.29, "step": 386499 }, { "epoch": 4.29, "learning_rate": 2.3197551020408163e-05, "loss": 2.1593, "step": 386500 }, { "FLOPS loss": 0.05914951488375664, "L0_d": 947.56, "MLM loss": 2.01639461517334, "epoch": 4.29, "step": 386999 }, { "epoch": 4.29, "learning_rate": 2.3095714285714287e-05, "loss": 2.1601, "step": 387000 }, { "FLOPS loss": 0.05045102536678314, "L0_d": 785.75, "MLM loss": 2.084231376647949, "epoch": 4.3, "step": 387499 }, { "epoch": 4.3, "learning_rate": 2.2993673469387756e-05, "loss": 2.1554, "step": 387500 }, { "FLOPS loss": 0.07288298010826111, "L0_d": 1233.33, "MLM loss": 2.093576192855835, "epoch": 4.31, "step": 387999 }, { "epoch": 4.31, "learning_rate": 2.2891632653061227e-05, "loss": 2.1615, "step": 388000 }, { "FLOPS loss": 0.06319364160299301, "L0_d": 1019.59, "MLM loss": 2.149829626083374, "epoch": 4.31, "step": 388499 }, { "epoch": 4.31, "learning_rate": 2.2789591836734695e-05, "loss": 2.155, "step": 388500 }, { "FLOPS loss": 0.06716024875640869, "L0_d": 1190.72, "MLM loss": 2.3623664379119873, "epoch": 4.32, "step": 388999 }, { "epoch": 4.32, "learning_rate": 2.2687551020408167e-05, "loss": 2.1597, "step": 389000 }, { "FLOPS loss": 0.04663018882274628, "L0_d": 790.16, "MLM loss": 2.1722326278686523, "epoch": 4.32, "step": 389499 }, { "epoch": 4.32, "learning_rate": 2.2585714285714287e-05, "loss": 2.1556, "step": 389500 }, { "FLOPS loss": 0.05925951898097992, "L0_d": 1008.88, "MLM loss": 1.928437352180481, "epoch": 4.33, "step": 389999 }, { "epoch": 4.33, "learning_rate": 2.2483673469387755e-05, "loss": 2.1583, "step": 390000 }, { "FLOPS loss": 0.0513288788497448, "L0_d": 839.44, "MLM loss": 1.9191234111785889, "epoch": 4.33, "step": 390499 }, { "epoch": 4.33, "learning_rate": 2.2381632653061227e-05, "loss": 2.1549, "step": 390500 }, { "FLOPS loss": 0.06269978731870651, "L0_d": 984.92, "MLM loss": 2.1687002182006836, "epoch": 4.34, "step": 390999 }, { "epoch": 4.34, "learning_rate": 2.2279591836734695e-05, "loss": 2.1563, "step": 391000 }, { "FLOPS loss": 0.044437021017074585, "L0_d": 850.3, "MLM loss": 2.2203116416931152, "epoch": 4.34, "step": 391499 }, { "epoch": 4.34, "learning_rate": 2.217775510204082e-05, "loss": 2.1586, "step": 391500 }, { "FLOPS loss": 0.06293778866529465, "L0_d": 842.56, "MLM loss": 2.1339268684387207, "epoch": 4.35, "step": 391999 }, { "epoch": 4.35, "learning_rate": 2.2075714285714287e-05, "loss": 2.1574, "step": 392000 }, { "FLOPS loss": 0.05700749531388283, "L0_d": 862.44, "MLM loss": 2.16910719871521, "epoch": 4.36, "step": 392499 }, { "epoch": 4.36, "learning_rate": 2.1973673469387755e-05, "loss": 2.1542, "step": 392500 }, { "FLOPS loss": 0.058901745826005936, "L0_d": 1115.69, "MLM loss": 2.187575578689575, "epoch": 4.36, "step": 392999 }, { "epoch": 4.36, "learning_rate": 2.1871632653061223e-05, "loss": 2.1519, "step": 393000 }, { "FLOPS loss": 0.045208640396595, "L0_d": 652.38, "MLM loss": 2.3575387001037598, "epoch": 4.37, "step": 393499 }, { "epoch": 4.37, "learning_rate": 2.1769795918367347e-05, "loss": 2.1542, "step": 393500 }, { "FLOPS loss": 0.052039358764886856, "L0_d": 910.8, "MLM loss": 2.1134583950042725, "epoch": 4.37, "step": 393999 }, { "epoch": 4.37, "learning_rate": 2.166775510204082e-05, "loss": 2.1555, "step": 394000 }, { "FLOPS loss": 0.050663623958826065, "L0_d": 848.61, "MLM loss": 2.2021045684814453, "epoch": 4.38, "step": 394499 }, { "epoch": 4.38, "learning_rate": 2.1565714285714287e-05, "loss": 2.155, "step": 394500 }, { "FLOPS loss": 0.06039031967520714, "L0_d": 1012.92, "MLM loss": 2.1872599124908447, "epoch": 4.38, "step": 394999 }, { "epoch": 4.38, "learning_rate": 2.146367346938776e-05, "loss": 2.1523, "step": 395000 }, { "FLOPS loss": 0.050335533916950226, "L0_d": 898.25, "MLM loss": 2.09260630607605, "epoch": 4.39, "step": 395499 }, { "epoch": 4.39, "learning_rate": 2.136183673469388e-05, "loss": 2.1534, "step": 395500 }, { "FLOPS loss": 0.04941366985440254, "L0_d": 881.89, "MLM loss": 2.0738818645477295, "epoch": 4.39, "step": 395999 }, { "epoch": 4.39, "learning_rate": 2.1259795918367347e-05, "loss": 2.1548, "step": 396000 }, { "FLOPS loss": 0.04744439572095871, "L0_d": 830.31, "MLM loss": 2.045245885848999, "epoch": 4.4, "step": 396499 }, { "epoch": 4.4, "learning_rate": 2.1157755102040815e-05, "loss": 2.159, "step": 396500 }, { "FLOPS loss": 0.050974879413843155, "L0_d": 813.83, "MLM loss": 2.0699522495269775, "epoch": 4.41, "step": 396999 }, { "epoch": 4.41, "learning_rate": 2.1055714285714287e-05, "loss": 2.157, "step": 397000 }, { "FLOPS loss": 0.0584995336830616, "L0_d": 936.03, "MLM loss": 2.112994432449341, "epoch": 4.41, "step": 397499 }, { "epoch": 4.41, "learning_rate": 2.0953673469387755e-05, "loss": 2.1516, "step": 397500 }, { "FLOPS loss": 0.04688579589128494, "L0_d": 827.33, "MLM loss": 2.1029469966888428, "epoch": 4.42, "step": 397999 }, { "epoch": 4.42, "learning_rate": 2.085183673469388e-05, "loss": 2.1533, "step": 398000 }, { "FLOPS loss": 0.0415002666413784, "L0_d": 780.86, "MLM loss": 2.2324435710906982, "epoch": 4.42, "step": 398499 }, { "epoch": 4.42, "learning_rate": 2.0749795918367347e-05, "loss": 2.153, "step": 398500 }, { "FLOPS loss": 0.04630446434020996, "L0_d": 860.14, "MLM loss": 1.9942233562469482, "epoch": 4.43, "step": 398999 }, { "epoch": 4.43, "learning_rate": 2.0647755102040815e-05, "loss": 2.1514, "step": 399000 }, { "FLOPS loss": 0.04342009127140045, "L0_d": 806.17, "MLM loss": 2.2144784927368164, "epoch": 4.43, "step": 399499 }, { "epoch": 4.43, "learning_rate": 2.0545714285714286e-05, "loss": 2.1503, "step": 399500 }, { "FLOPS loss": 0.05658222362399101, "L0_d": 840.0, "MLM loss": 1.9057402610778809, "epoch": 4.44, "step": 399999 }, { "epoch": 4.44, "learning_rate": 2.044387755102041e-05, "loss": 2.1536, "step": 400000 }, { "FLOPS loss": 0.05019119381904602, "L0_d": 839.64, "MLM loss": 1.9638289213180542, "epoch": 4.44, "step": 400499 }, { "epoch": 4.44, "learning_rate": 2.034183673469388e-05, "loss": 2.1579, "step": 400500 }, { "FLOPS loss": 0.0584777407348156, "L0_d": 905.55, "MLM loss": 2.0449748039245605, "epoch": 4.45, "step": 400999 }, { "epoch": 4.45, "learning_rate": 2.024e-05, "loss": 2.1457, "step": 401000 }, { "FLOPS loss": 0.04894617944955826, "L0_d": 804.42, "MLM loss": 1.9672155380249023, "epoch": 4.46, "step": 401499 }, { "epoch": 4.46, "learning_rate": 2.013795918367347e-05, "loss": 2.1526, "step": 401500 }, { "FLOPS loss": 0.04332287609577179, "L0_d": 917.98, "MLM loss": 2.271369218826294, "epoch": 4.46, "step": 401999 }, { "epoch": 4.46, "learning_rate": 2.003591836734694e-05, "loss": 2.1493, "step": 402000 }, { "FLOPS loss": 0.05052706599235535, "L0_d": 789.55, "MLM loss": 2.2546157836914062, "epoch": 4.47, "step": 402499 }, { "epoch": 4.47, "learning_rate": 1.9933877551020407e-05, "loss": 2.1504, "step": 402500 }, { "FLOPS loss": 0.05615134909749031, "L0_d": 893.88, "MLM loss": 2.0987918376922607, "epoch": 4.47, "step": 402999 }, { "epoch": 4.47, "learning_rate": 1.983183673469388e-05, "loss": 2.1479, "step": 403000 }, { "FLOPS loss": 0.048413895070552826, "L0_d": 730.16, "MLM loss": 1.8651633262634277, "epoch": 4.48, "step": 403499 }, { "epoch": 4.48, "learning_rate": 1.9729795918367347e-05, "loss": 2.1478, "step": 403500 }, { "FLOPS loss": 0.05873379856348038, "L0_d": 1136.23, "MLM loss": 1.8329681158065796, "epoch": 4.48, "step": 403999 }, { "epoch": 4.48, "learning_rate": 1.9627755102040818e-05, "loss": 2.153, "step": 404000 }, { "FLOPS loss": 0.05278199911117554, "L0_d": 961.02, "MLM loss": 2.1568922996520996, "epoch": 4.49, "step": 404499 }, { "epoch": 4.49, "learning_rate": 1.9525714285714286e-05, "loss": 2.1519, "step": 404500 }, { "FLOPS loss": 0.05051026865839958, "L0_d": 811.0, "MLM loss": 2.233319044113159, "epoch": 4.49, "step": 404999 }, { "epoch": 4.49, "learning_rate": 1.9423673469387758e-05, "loss": 2.147, "step": 405000 }, { "FLOPS loss": 0.053434837609529495, "L0_d": 897.11, "MLM loss": 1.9460184574127197, "epoch": 4.5, "step": 405499 }, { "epoch": 4.5, "learning_rate": 1.9321632653061226e-05, "loss": 2.1505, "step": 405500 }, { "FLOPS loss": 0.05347585305571556, "L0_d": 800.86, "MLM loss": 2.0889968872070312, "epoch": 4.51, "step": 405999 }, { "epoch": 4.51, "learning_rate": 1.9219795918367346e-05, "loss": 2.1509, "step": 406000 }, { "FLOPS loss": 0.050644200295209885, "L0_d": 820.31, "MLM loss": 2.0721323490142822, "epoch": 4.51, "step": 406499 }, { "epoch": 4.51, "learning_rate": 1.9117755102040818e-05, "loss": 2.1477, "step": 406500 }, { "FLOPS loss": 0.06063523888587952, "L0_d": 1009.72, "MLM loss": 1.9239542484283447, "epoch": 4.52, "step": 406999 }, { "epoch": 4.52, "learning_rate": 1.9015714285714286e-05, "loss": 2.1516, "step": 407000 }, { "FLOPS loss": 0.0654524490237236, "L0_d": 1082.38, "MLM loss": 2.043360471725464, "epoch": 4.52, "step": 407499 }, { "epoch": 4.52, "learning_rate": 1.8913673469387754e-05, "loss": 2.1491, "step": 407500 }, { "FLOPS loss": 0.054562315344810486, "L0_d": 854.02, "MLM loss": 1.9695563316345215, "epoch": 4.53, "step": 407999 }, { "epoch": 4.53, "learning_rate": 1.8811836734693878e-05, "loss": 2.147, "step": 408000 }, { "FLOPS loss": 0.035512614995241165, "L0_d": 632.73, "MLM loss": 2.0668578147888184, "epoch": 4.53, "step": 408499 }, { "epoch": 4.53, "learning_rate": 1.871e-05, "loss": 2.146, "step": 408500 }, { "FLOPS loss": 0.044345319271087646, "L0_d": 819.17, "MLM loss": 2.005946159362793, "epoch": 4.54, "step": 408999 }, { "epoch": 4.54, "learning_rate": 1.860795918367347e-05, "loss": 2.1498, "step": 409000 }, { "FLOPS loss": 0.04264531657099724, "L0_d": 766.22, "MLM loss": 2.2345073223114014, "epoch": 4.54, "step": 409499 }, { "epoch": 4.54, "learning_rate": 1.8505918367346938e-05, "loss": 2.1483, "step": 409500 }, { "FLOPS loss": 0.04678963124752045, "L0_d": 1029.28, "MLM loss": 2.1507232189178467, "epoch": 4.55, "step": 409999 }, { "epoch": 4.55, "learning_rate": 1.840387755102041e-05, "loss": 2.1474, "step": 410000 }, { "FLOPS loss": 0.048637717962265015, "L0_d": 742.8, "MLM loss": 2.1675162315368652, "epoch": 4.56, "step": 410499 }, { "epoch": 4.56, "learning_rate": 1.8301836734693878e-05, "loss": 2.1477, "step": 410500 }, { "FLOPS loss": 0.048585373908281326, "L0_d": 844.22, "MLM loss": 2.0447845458984375, "epoch": 4.56, "step": 410999 }, { "epoch": 4.56, "learning_rate": 1.819979591836735e-05, "loss": 2.1439, "step": 411000 }, { "FLOPS loss": 0.0657045915722847, "L0_d": 1241.75, "MLM loss": 2.156813383102417, "epoch": 4.57, "step": 411499 }, { "epoch": 4.57, "learning_rate": 1.8097755102040817e-05, "loss": 2.1465, "step": 411500 }, { "FLOPS loss": 0.062336936593055725, "L0_d": 904.86, "MLM loss": 1.986930251121521, "epoch": 4.57, "step": 411999 }, { "epoch": 4.57, "learning_rate": 1.7995714285714285e-05, "loss": 2.1435, "step": 412000 }, { "FLOPS loss": 0.053948063403367996, "L0_d": 786.78, "MLM loss": 2.23659348487854, "epoch": 4.58, "step": 412499 }, { "epoch": 4.58, "learning_rate": 1.7893673469387757e-05, "loss": 2.1481, "step": 412500 }, { "FLOPS loss": 0.0543290376663208, "L0_d": 881.95, "MLM loss": 2.1556718349456787, "epoch": 4.58, "step": 412999 }, { "epoch": 4.58, "learning_rate": 1.7791836734693878e-05, "loss": 2.1455, "step": 413000 }, { "FLOPS loss": 0.045479871332645416, "L0_d": 814.44, "MLM loss": 2.218897819519043, "epoch": 4.59, "step": 413499 }, { "epoch": 4.59, "learning_rate": 1.7689795918367346e-05, "loss": 2.1457, "step": 413500 }, { "FLOPS loss": 0.05531291291117668, "L0_d": 882.67, "MLM loss": 2.07147216796875, "epoch": 4.59, "step": 413999 }, { "epoch": 4.59, "learning_rate": 1.7587755102040817e-05, "loss": 2.1459, "step": 414000 }, { "FLOPS loss": 0.057411227375268936, "L0_d": 949.89, "MLM loss": 2.235172748565674, "epoch": 4.6, "step": 414499 }, { "epoch": 4.6, "learning_rate": 1.7485714285714285e-05, "loss": 2.1475, "step": 414500 }, { "FLOPS loss": 0.05269256606698036, "L0_d": 824.98, "MLM loss": 2.0441198348999023, "epoch": 4.61, "step": 414999 }, { "epoch": 4.61, "learning_rate": 1.738387755102041e-05, "loss": 2.146, "step": 415000 }, { "FLOPS loss": 0.05347437039017677, "L0_d": 920.3, "MLM loss": 2.033576726913452, "epoch": 4.61, "step": 415499 }, { "epoch": 4.61, "learning_rate": 1.728183673469388e-05, "loss": 2.1438, "step": 415500 }, { "FLOPS loss": 0.06344080716371536, "L0_d": 965.66, "MLM loss": 2.138929605484009, "epoch": 4.62, "step": 415999 }, { "epoch": 4.62, "learning_rate": 1.717979591836735e-05, "loss": 2.1446, "step": 416000 }, { "FLOPS loss": 0.047302767634391785, "L0_d": 716.83, "MLM loss": 2.0310006141662598, "epoch": 4.62, "step": 416499 }, { "epoch": 4.62, "learning_rate": 1.7077755102040817e-05, "loss": 2.1381, "step": 416500 }, { "FLOPS loss": 0.055353209376335144, "L0_d": 932.81, "MLM loss": 2.0060908794403076, "epoch": 4.63, "step": 416999 }, { "epoch": 4.63, "learning_rate": 1.697591836734694e-05, "loss": 2.1471, "step": 417000 }, { "FLOPS loss": 0.04700247198343277, "L0_d": 870.61, "MLM loss": 2.19582462310791, "epoch": 4.63, "step": 417499 }, { "epoch": 4.63, "learning_rate": 1.687387755102041e-05, "loss": 2.1448, "step": 417500 }, { "FLOPS loss": 0.03677580878138542, "L0_d": 678.52, "MLM loss": 1.8639564514160156, "epoch": 4.64, "step": 417999 }, { "epoch": 4.64, "learning_rate": 1.6771836734693877e-05, "loss": 2.1448, "step": 418000 }, { "FLOPS loss": 0.06459075957536697, "L0_d": 1085.22, "MLM loss": 2.169715404510498, "epoch": 4.64, "step": 418499 }, { "epoch": 4.64, "learning_rate": 1.666979591836735e-05, "loss": 2.1505, "step": 418500 }, { "FLOPS loss": 0.04992241784930229, "L0_d": 813.09, "MLM loss": 2.17281174659729, "epoch": 4.65, "step": 418999 }, { "epoch": 4.65, "learning_rate": 1.656795918367347e-05, "loss": 2.1465, "step": 419000 }, { "FLOPS loss": 0.058006759732961655, "L0_d": 907.12, "MLM loss": 2.049400806427002, "epoch": 4.66, "step": 419499 }, { "epoch": 4.66, "learning_rate": 1.6465918367346937e-05, "loss": 2.1451, "step": 419500 }, { "FLOPS loss": 0.0593842938542366, "L0_d": 965.75, "MLM loss": 1.9616055488586426, "epoch": 4.66, "step": 419999 }, { "epoch": 4.66, "learning_rate": 1.636387755102041e-05, "loss": 2.1398, "step": 420000 }, { "FLOPS loss": 0.05656881630420685, "L0_d": 832.55, "MLM loss": 1.9994637966156006, "epoch": 4.67, "step": 420499 }, { "epoch": 4.67, "learning_rate": 1.6261836734693877e-05, "loss": 2.1383, "step": 420500 }, { "FLOPS loss": 0.04367349296808243, "L0_d": 825.02, "MLM loss": 2.0119988918304443, "epoch": 4.67, "step": 420999 }, { "epoch": 4.67, "learning_rate": 1.615979591836735e-05, "loss": 2.1404, "step": 421000 }, { "FLOPS loss": 0.04331597313284874, "L0_d": 794.58, "MLM loss": 2.2233223915100098, "epoch": 4.68, "step": 421499 }, { "epoch": 4.68, "learning_rate": 1.6057959183673473e-05, "loss": 2.1398, "step": 421500 }, { "FLOPS loss": 0.04418673366308212, "L0_d": 731.09, "MLM loss": 2.0380990505218506, "epoch": 4.68, "step": 421999 }, { "epoch": 4.68, "learning_rate": 1.595591836734694e-05, "loss": 2.136, "step": 422000 }, { "FLOPS loss": 0.05184420198202133, "L0_d": 1025.86, "MLM loss": 1.980980396270752, "epoch": 4.69, "step": 422499 }, { "epoch": 4.69, "learning_rate": 1.585387755102041e-05, "loss": 2.1389, "step": 422500 }, { "FLOPS loss": 0.05402204021811485, "L0_d": 843.53, "MLM loss": 2.2986907958984375, "epoch": 4.69, "step": 422999 }, { "epoch": 4.69, "learning_rate": 1.5751836734693877e-05, "loss": 2.1418, "step": 423000 }, { "FLOPS loss": 0.052429232746362686, "L0_d": 803.08, "MLM loss": 1.992973804473877, "epoch": 4.7, "step": 423499 }, { "epoch": 4.7, "learning_rate": 1.565e-05, "loss": 2.1426, "step": 423500 }, { "FLOPS loss": 0.05779305100440979, "L0_d": 914.12, "MLM loss": 2.0805771350860596, "epoch": 4.71, "step": 423999 }, { "epoch": 4.71, "learning_rate": 1.554795918367347e-05, "loss": 2.1412, "step": 424000 }, { "FLOPS loss": 0.04339585453271866, "L0_d": 806.77, "MLM loss": 2.1252779960632324, "epoch": 4.71, "step": 424499 }, { "epoch": 4.71, "learning_rate": 1.544591836734694e-05, "loss": 2.1432, "step": 424500 }, { "FLOPS loss": 0.05198323354125023, "L0_d": 1073.17, "MLM loss": 2.2074055671691895, "epoch": 4.72, "step": 424999 }, { "epoch": 4.72, "learning_rate": 1.534387755102041e-05, "loss": 2.1353, "step": 425000 }, { "FLOPS loss": 0.051012177020311356, "L0_d": 805.81, "MLM loss": 1.954539179801941, "epoch": 4.72, "step": 425499 }, { "epoch": 4.72, "learning_rate": 1.5242040816326531e-05, "loss": 2.1402, "step": 425500 }, { "FLOPS loss": 0.06652916967868805, "L0_d": 1030.38, "MLM loss": 2.0773637294769287, "epoch": 4.73, "step": 425999 }, { "epoch": 4.73, "learning_rate": 1.514e-05, "loss": 2.1395, "step": 426000 }, { "FLOPS loss": 0.05978979915380478, "L0_d": 913.59, "MLM loss": 2.158665895462036, "epoch": 4.73, "step": 426499 }, { "epoch": 4.73, "learning_rate": 1.5037959183673469e-05, "loss": 2.1409, "step": 426500 }, { "FLOPS loss": 0.05733130872249603, "L0_d": 994.62, "MLM loss": 2.2022311687469482, "epoch": 4.74, "step": 426999 }, { "epoch": 4.74, "learning_rate": 1.493591836734694e-05, "loss": 2.1373, "step": 427000 }, { "FLOPS loss": 0.048791565001010895, "L0_d": 828.66, "MLM loss": 2.0462646484375, "epoch": 4.74, "step": 427499 }, { "epoch": 4.74, "learning_rate": 1.4833877551020408e-05, "loss": 2.1357, "step": 427500 }, { "FLOPS loss": 0.04827158525586128, "L0_d": 829.69, "MLM loss": 2.130842447280884, "epoch": 4.75, "step": 427999 }, { "epoch": 4.75, "learning_rate": 1.473204081632653e-05, "loss": 2.1414, "step": 428000 }, { "FLOPS loss": 0.04919278621673584, "L0_d": 1056.19, "MLM loss": 2.139404296875, "epoch": 4.76, "step": 428499 }, { "epoch": 4.76, "learning_rate": 1.4630000000000002e-05, "loss": 2.1347, "step": 428500 }, { "FLOPS loss": 0.05565393716096878, "L0_d": 954.14, "MLM loss": 2.084869623184204, "epoch": 4.76, "step": 428999 }, { "epoch": 4.76, "learning_rate": 1.452795918367347e-05, "loss": 2.1364, "step": 429000 }, { "FLOPS loss": 0.06739050149917603, "L0_d": 1041.61, "MLM loss": 2.1569736003875732, "epoch": 4.77, "step": 429499 }, { "epoch": 4.77, "learning_rate": 1.4425918367346938e-05, "loss": 2.1391, "step": 429500 }, { "FLOPS loss": 0.05755490064620972, "L0_d": 879.2, "MLM loss": 2.143249034881592, "epoch": 4.77, "step": 429999 }, { "epoch": 4.77, "learning_rate": 1.432387755102041e-05, "loss": 2.1348, "step": 430000 }, { "FLOPS loss": 0.05807384476065636, "L0_d": 930.75, "MLM loss": 2.119503974914551, "epoch": 4.78, "step": 430499 }, { "epoch": 4.78, "learning_rate": 1.4222040816326532e-05, "loss": 2.1354, "step": 430500 }, { "FLOPS loss": 0.05283951386809349, "L0_d": 925.66, "MLM loss": 2.013265371322632, "epoch": 4.78, "step": 430999 }, { "epoch": 4.78, "learning_rate": 1.4120204081632655e-05, "loss": 2.1359, "step": 431000 }, { "FLOPS loss": 0.05283992737531662, "L0_d": 862.98, "MLM loss": 2.0538315773010254, "epoch": 4.79, "step": 431499 }, { "epoch": 4.79, "learning_rate": 1.4018163265306123e-05, "loss": 2.1364, "step": 431500 }, { "FLOPS loss": 0.05638214573264122, "L0_d": 913.14, "MLM loss": 1.8696248531341553, "epoch": 4.79, "step": 431999 }, { "epoch": 4.79, "learning_rate": 1.3916122448979592e-05, "loss": 2.1406, "step": 432000 }, { "FLOPS loss": 0.04940183460712433, "L0_d": 755.73, "MLM loss": 1.973371982574463, "epoch": 4.8, "step": 432499 }, { "epoch": 4.8, "learning_rate": 1.3814081632653062e-05, "loss": 2.1399, "step": 432500 }, { "FLOPS loss": 0.055572617799043655, "L0_d": 903.88, "MLM loss": 2.1445822715759277, "epoch": 4.81, "step": 432999 }, { "epoch": 4.81, "learning_rate": 1.3712040816326532e-05, "loss": 2.1371, "step": 433000 }, { "FLOPS loss": 0.06619881093502045, "L0_d": 1185.22, "MLM loss": 2.0282764434814453, "epoch": 4.81, "step": 433499 }, { "epoch": 4.81, "learning_rate": 1.361e-05, "loss": 2.1384, "step": 433500 }, { "FLOPS loss": 0.05328712612390518, "L0_d": 873.62, "MLM loss": 2.2030832767486572, "epoch": 4.82, "step": 433999 }, { "epoch": 4.82, "learning_rate": 1.3507959183673468e-05, "loss": 2.1343, "step": 434000 }, { "FLOPS loss": 0.04561091959476471, "L0_d": 824.95, "MLM loss": 2.061128854751587, "epoch": 4.82, "step": 434499 }, { "epoch": 4.82, "learning_rate": 1.340591836734694e-05, "loss": 2.1402, "step": 434500 }, { "FLOPS loss": 0.05250409245491028, "L0_d": 879.39, "MLM loss": 2.151285409927368, "epoch": 4.83, "step": 434999 }, { "epoch": 4.83, "learning_rate": 1.3304081632653062e-05, "loss": 2.14, "step": 435000 }, { "FLOPS loss": 0.06207619607448578, "L0_d": 916.61, "MLM loss": 2.2853026390075684, "epoch": 4.83, "step": 435499 }, { "epoch": 4.83, "learning_rate": 1.320204081632653e-05, "loss": 2.1403, "step": 435500 }, { "FLOPS loss": 0.0664757490158081, "L0_d": 945.84, "MLM loss": 2.2041711807250977, "epoch": 4.84, "step": 435999 }, { "epoch": 4.84, "learning_rate": 1.3100000000000002e-05, "loss": 2.1349, "step": 436000 }, { "FLOPS loss": 0.04947144165635109, "L0_d": 876.11, "MLM loss": 2.101712226867676, "epoch": 4.84, "step": 436499 }, { "epoch": 4.84, "learning_rate": 1.299795918367347e-05, "loss": 2.1365, "step": 436500 }, { "FLOPS loss": 0.04111479967832565, "L0_d": 688.97, "MLM loss": 1.989811658859253, "epoch": 4.85, "step": 436999 }, { "epoch": 4.85, "learning_rate": 1.289591836734694e-05, "loss": 2.133, "step": 437000 }, { "FLOPS loss": 0.05196467041969299, "L0_d": 987.73, "MLM loss": 2.1359786987304688, "epoch": 4.86, "step": 437499 }, { "epoch": 4.86, "learning_rate": 1.2794081632653062e-05, "loss": 2.1304, "step": 437500 }, { "FLOPS loss": 0.04984664544463158, "L0_d": 815.52, "MLM loss": 2.013530969619751, "epoch": 4.86, "step": 437999 }, { "epoch": 4.86, "learning_rate": 1.2692040816326532e-05, "loss": 2.1373, "step": 438000 }, { "FLOPS loss": 0.0501394160091877, "L0_d": 975.08, "MLM loss": 2.0036849975585938, "epoch": 4.87, "step": 438499 }, { "epoch": 4.87, "learning_rate": 1.2590000000000001e-05, "loss": 2.1316, "step": 438500 }, { "FLOPS loss": 0.04254571348428726, "L0_d": 692.45, "MLM loss": 2.203824043273926, "epoch": 4.87, "step": 438999 }, { "epoch": 4.87, "learning_rate": 1.248795918367347e-05, "loss": 2.1373, "step": 439000 }, { "FLOPS loss": 0.06851733475923538, "L0_d": 1001.78, "MLM loss": 2.124694585800171, "epoch": 4.88, "step": 439499 }, { "epoch": 4.88, "learning_rate": 1.2386122448979592e-05, "loss": 2.1375, "step": 439500 }, { "FLOPS loss": 0.04886775463819504, "L0_d": 794.89, "MLM loss": 2.1344408988952637, "epoch": 4.88, "step": 439999 }, { "epoch": 4.88, "learning_rate": 1.2284081632653062e-05, "loss": 2.136, "step": 440000 }, { "FLOPS loss": 0.053634896874427795, "L0_d": 982.39, "MLM loss": 2.003080368041992, "epoch": 4.89, "step": 440499 }, { "epoch": 4.89, "learning_rate": 1.2182040816326531e-05, "loss": 2.13, "step": 440500 }, { "FLOPS loss": 0.04614207521080971, "L0_d": 898.53, "MLM loss": 2.005286455154419, "epoch": 4.89, "step": 440999 }, { "epoch": 4.89, "learning_rate": 1.2080000000000001e-05, "loss": 2.1302, "step": 441000 }, { "FLOPS loss": 0.04019342362880707, "L0_d": 706.3, "MLM loss": 2.0040509700775146, "epoch": 4.9, "step": 441499 }, { "epoch": 4.9, "learning_rate": 1.1978163265306124e-05, "loss": 2.133, "step": 441500 }, { "FLOPS loss": 0.050139471888542175, "L0_d": 864.17, "MLM loss": 2.0061678886413574, "epoch": 4.91, "step": 441999 }, { "epoch": 4.91, "learning_rate": 1.1876122448979593e-05, "loss": 2.1347, "step": 442000 }, { "FLOPS loss": 0.05669067054986954, "L0_d": 914.73, "MLM loss": 2.00954008102417, "epoch": 4.91, "step": 442499 }, { "epoch": 4.91, "learning_rate": 1.1774081632653061e-05, "loss": 2.1348, "step": 442500 }, { "FLOPS loss": 0.05425754189491272, "L0_d": 871.06, "MLM loss": 1.946420669555664, "epoch": 4.92, "step": 442999 }, { "epoch": 4.92, "learning_rate": 1.1672040816326531e-05, "loss": 2.1325, "step": 443000 }, { "FLOPS loss": 0.04397822171449661, "L0_d": 710.33, "MLM loss": 2.0439536571502686, "epoch": 4.92, "step": 443499 }, { "epoch": 4.92, "learning_rate": 1.1570204081632654e-05, "loss": 2.1332, "step": 443500 }, { "FLOPS loss": 0.05384219065308571, "L0_d": 1073.23, "MLM loss": 1.973341464996338, "epoch": 4.93, "step": 443999 }, { "epoch": 4.93, "learning_rate": 1.1468163265306123e-05, "loss": 2.1372, "step": 444000 }, { "FLOPS loss": 0.04803945869207382, "L0_d": 1054.66, "MLM loss": 2.0352325439453125, "epoch": 4.93, "step": 444499 }, { "epoch": 4.93, "learning_rate": 1.1366122448979592e-05, "loss": 2.1307, "step": 444500 }, { "FLOPS loss": 0.05171111226081848, "L0_d": 1110.98, "MLM loss": 1.9978969097137451, "epoch": 4.94, "step": 444999 }, { "epoch": 4.94, "learning_rate": 1.1264081632653061e-05, "loss": 2.1303, "step": 445000 }, { "FLOPS loss": 0.043126724660396576, "L0_d": 721.97, "MLM loss": 2.0790863037109375, "epoch": 4.94, "step": 445499 }, { "epoch": 4.94, "learning_rate": 1.1162244897959184e-05, "loss": 2.1333, "step": 445500 }, { "FLOPS loss": 0.06537404656410217, "L0_d": 896.14, "MLM loss": 1.9862679243087769, "epoch": 4.95, "step": 445999 }, { "epoch": 4.95, "learning_rate": 1.1060204081632653e-05, "loss": 2.1311, "step": 446000 }, { "FLOPS loss": 0.04740593582391739, "L0_d": 772.75, "MLM loss": 2.0791940689086914, "epoch": 4.96, "step": 446499 }, { "epoch": 4.96, "learning_rate": 1.0958163265306123e-05, "loss": 2.1295, "step": 446500 }, { "FLOPS loss": 0.06982973217964172, "L0_d": 1037.58, "MLM loss": 2.172328472137451, "epoch": 4.96, "step": 446999 }, { "epoch": 4.96, "learning_rate": 1.0856122448979593e-05, "loss": 2.1337, "step": 447000 }, { "FLOPS loss": 0.050109609961509705, "L0_d": 808.73, "MLM loss": 2.2643849849700928, "epoch": 4.97, "step": 447499 }, { "epoch": 4.97, "learning_rate": 1.0754081632653061e-05, "loss": 2.133, "step": 447500 }, { "FLOPS loss": 0.05233826860785484, "L0_d": 877.59, "MLM loss": 2.065129280090332, "epoch": 4.97, "step": 447999 }, { "epoch": 4.97, "learning_rate": 1.0652244897959183e-05, "loss": 2.136, "step": 448000 }, { "FLOPS loss": 0.04769035056233406, "L0_d": 869.67, "MLM loss": 2.1656153202056885, "epoch": 4.98, "step": 448499 }, { "epoch": 4.98, "learning_rate": 1.0550204081632653e-05, "loss": 2.1326, "step": 448500 }, { "FLOPS loss": 0.05040234699845314, "L0_d": 941.58, "MLM loss": 2.1029789447784424, "epoch": 4.98, "step": 448999 }, { "epoch": 4.98, "learning_rate": 1.0448163265306123e-05, "loss": 2.1289, "step": 449000 }, { "FLOPS loss": 0.05159325897693634, "L0_d": 820.91, "MLM loss": 2.083779811859131, "epoch": 4.99, "step": 449499 }, { "epoch": 4.99, "learning_rate": 1.0346122448979593e-05, "loss": 2.13, "step": 449500 }, { "FLOPS loss": 0.05995727330446243, "L0_d": 996.25, "MLM loss": 2.0526297092437744, "epoch": 4.99, "step": 449999 }, { "epoch": 4.99, "learning_rate": 1.0244285714285715e-05, "loss": 2.1318, "step": 450000 }, { "FLOPS loss": 0.04719854146242142, "L0_d": 918.64, "MLM loss": 2.1177916526794434, "epoch": 5.0, "step": 450499 }, { "epoch": 5.0, "learning_rate": 1.0142244897959185e-05, "loss": 2.1289, "step": 450500 }, { "FLOPS loss": 0.052875321358442307, "L0_d": 1014.64, "MLM loss": 2.173793315887451, "epoch": 5.01, "step": 450999 }, { "epoch": 5.01, "learning_rate": 1.0040204081632653e-05, "loss": 2.1322, "step": 451000 }, { "FLOPS loss": 0.04699577018618584, "L0_d": 1010.91, "MLM loss": 2.0319714546203613, "epoch": 5.01, "step": 451499 }, { "epoch": 5.01, "learning_rate": 9.938163265306123e-06, "loss": 2.1264, "step": 451500 }, { "FLOPS loss": 0.04523584991693497, "L0_d": 711.03, "MLM loss": 1.8854057788848877, "epoch": 5.02, "step": 451999 }, { "epoch": 5.02, "learning_rate": 9.836326530612245e-06, "loss": 2.1269, "step": 452000 }, { "FLOPS loss": 0.06195767596364021, "L0_d": 978.62, "MLM loss": 1.9085772037506104, "epoch": 5.02, "step": 452499 }, { "epoch": 5.02, "learning_rate": 9.734285714285715e-06, "loss": 2.1263, "step": 452500 }, { "FLOPS loss": 0.06156365945935249, "L0_d": 1111.45, "MLM loss": 2.158266067504883, "epoch": 5.03, "step": 452999 }, { "epoch": 5.03, "learning_rate": 9.632244897959185e-06, "loss": 2.131, "step": 453000 }, { "FLOPS loss": 0.07194296270608902, "L0_d": 1003.98, "MLM loss": 2.00203013420105, "epoch": 5.03, "step": 453499 }, { "epoch": 5.03, "learning_rate": 9.530204081632653e-06, "loss": 2.1238, "step": 453500 }, { "FLOPS loss": 0.04217708855867386, "L0_d": 677.08, "MLM loss": 2.040041923522949, "epoch": 5.04, "step": 453999 }, { "epoch": 5.04, "learning_rate": 9.428163265306123e-06, "loss": 2.1283, "step": 454000 }, { "FLOPS loss": 0.051338110119104385, "L0_d": 913.02, "MLM loss": 2.008694887161255, "epoch": 5.04, "step": 454499 }, { "epoch": 5.04, "learning_rate": 9.326326530612245e-06, "loss": 2.1236, "step": 454500 }, { "FLOPS loss": 0.054415568709373474, "L0_d": 931.77, "MLM loss": 2.146310806274414, "epoch": 5.05, "step": 454999 }, { "epoch": 5.05, "learning_rate": 9.224285714285715e-06, "loss": 2.1302, "step": 455000 }, { "FLOPS loss": 0.0452914722263813, "L0_d": 774.72, "MLM loss": 2.070974588394165, "epoch": 5.06, "step": 455499 }, { "epoch": 5.06, "learning_rate": 9.122244897959185e-06, "loss": 2.1241, "step": 455500 }, { "FLOPS loss": 0.04789729043841362, "L0_d": 1053.67, "MLM loss": 2.138104200363159, "epoch": 5.06, "step": 455999 }, { "epoch": 5.06, "learning_rate": 9.020204081632654e-06, "loss": 2.1255, "step": 456000 }, { "FLOPS loss": 0.07352223247289658, "L0_d": 1284.73, "MLM loss": 2.0282247066497803, "epoch": 5.07, "step": 456499 }, { "epoch": 5.07, "learning_rate": 8.918163265306122e-06, "loss": 2.121, "step": 456500 }, { "FLOPS loss": 0.058553002774715424, "L0_d": 806.44, "MLM loss": 1.9811928272247314, "epoch": 5.07, "step": 456999 }, { "epoch": 5.07, "learning_rate": 8.816326530612245e-06, "loss": 2.1252, "step": 457000 }, { "FLOPS loss": 0.06054987758398056, "L0_d": 871.59, "MLM loss": 2.0565450191497803, "epoch": 5.08, "step": 457499 }, { "epoch": 5.08, "learning_rate": 8.714285714285715e-06, "loss": 2.1268, "step": 457500 }, { "FLOPS loss": 0.05363916978240013, "L0_d": 877.92, "MLM loss": 2.1367383003234863, "epoch": 5.08, "step": 457999 }, { "epoch": 5.08, "learning_rate": 8.612244897959184e-06, "loss": 2.1239, "step": 458000 }, { "FLOPS loss": 0.05745154991745949, "L0_d": 899.88, "MLM loss": 2.0971121788024902, "epoch": 5.09, "step": 458499 }, { "epoch": 5.09, "learning_rate": 8.510204081632652e-06, "loss": 2.1241, "step": 458500 }, { "FLOPS loss": 0.047635529190301895, "L0_d": 789.28, "MLM loss": 2.1187169551849365, "epoch": 5.09, "step": 458999 }, { "epoch": 5.09, "learning_rate": 8.408367346938775e-06, "loss": 2.1243, "step": 459000 }, { "FLOPS loss": 0.04745763540267944, "L0_d": 794.22, "MLM loss": 2.3092610836029053, "epoch": 5.1, "step": 459499 }, { "epoch": 5.1, "learning_rate": 8.306326530612245e-06, "loss": 2.1275, "step": 459500 }, { "FLOPS loss": 0.0527702271938324, "L0_d": 919.58, "MLM loss": 1.9943205118179321, "epoch": 5.1, "step": 459999 }, { "epoch": 5.1, "learning_rate": 8.204285714285714e-06, "loss": 2.131, "step": 460000 }, { "FLOPS loss": 0.06276457011699677, "L0_d": 995.98, "MLM loss": 2.1917290687561035, "epoch": 5.11, "step": 460499 }, { "epoch": 5.11, "learning_rate": 8.102244897959184e-06, "loss": 2.1266, "step": 460500 }, { "FLOPS loss": 0.06025362387299538, "L0_d": 911.05, "MLM loss": 2.054462194442749, "epoch": 5.12, "step": 460999 }, { "epoch": 5.12, "learning_rate": 8.000204081632654e-06, "loss": 2.1252, "step": 461000 }, { "FLOPS loss": 0.03893174231052399, "L0_d": 673.28, "MLM loss": 2.0197315216064453, "epoch": 5.12, "step": 461499 }, { "epoch": 5.12, "learning_rate": 7.898367346938776e-06, "loss": 2.1251, "step": 461500 }, { "FLOPS loss": 0.05713987350463867, "L0_d": 1017.28, "MLM loss": 1.9514565467834473, "epoch": 5.13, "step": 461999 }, { "epoch": 5.13, "learning_rate": 7.796326530612246e-06, "loss": 2.1234, "step": 462000 }, { "FLOPS loss": 0.05448077246546745, "L0_d": 880.12, "MLM loss": 2.2003912925720215, "epoch": 5.13, "step": 462499 }, { "epoch": 5.13, "learning_rate": 7.694285714285714e-06, "loss": 2.1249, "step": 462500 }, { "FLOPS loss": 0.05133300647139549, "L0_d": 832.84, "MLM loss": 2.031496524810791, "epoch": 5.14, "step": 462999 }, { "epoch": 5.14, "learning_rate": 7.592244897959185e-06, "loss": 2.1258, "step": 463000 }, { "FLOPS loss": 0.052263010293245316, "L0_d": 893.61, "MLM loss": 2.00602388381958, "epoch": 5.14, "step": 463499 }, { "epoch": 5.14, "learning_rate": 7.490204081632654e-06, "loss": 2.1295, "step": 463500 }, { "FLOPS loss": 0.05631369352340698, "L0_d": 878.25, "MLM loss": 1.997037649154663, "epoch": 5.15, "step": 463999 }, { "epoch": 5.15, "learning_rate": 7.388163265306122e-06, "loss": 2.1259, "step": 464000 }, { "FLOPS loss": 0.05386161804199219, "L0_d": 856.02, "MLM loss": 1.9867942333221436, "epoch": 5.15, "step": 464499 }, { "epoch": 5.15, "learning_rate": 7.286326530612245e-06, "loss": 2.126, "step": 464500 }, { "FLOPS loss": 0.05438683182001114, "L0_d": 968.75, "MLM loss": 2.1743063926696777, "epoch": 5.16, "step": 464999 }, { "epoch": 5.16, "learning_rate": 7.184285714285714e-06, "loss": 2.1265, "step": 465000 }, { "FLOPS loss": 0.042536135762929916, "L0_d": 736.19, "MLM loss": 1.9791052341461182, "epoch": 5.17, "step": 465499 }, { "epoch": 5.17, "learning_rate": 7.082244897959184e-06, "loss": 2.1254, "step": 465500 }, { "FLOPS loss": 0.05652670934796333, "L0_d": 928.05, "MLM loss": 1.9028654098510742, "epoch": 5.17, "step": 465999 }, { "epoch": 5.17, "learning_rate": 6.9802040816326535e-06, "loss": 2.1216, "step": 466000 }, { "FLOPS loss": 0.06446348130702972, "L0_d": 1024.98, "MLM loss": 2.1626739501953125, "epoch": 5.18, "step": 466499 }, { "epoch": 5.18, "learning_rate": 6.878367346938776e-06, "loss": 2.1224, "step": 466500 }, { "FLOPS loss": 0.05092165619134903, "L0_d": 809.06, "MLM loss": 2.084792137145996, "epoch": 5.18, "step": 466999 }, { "epoch": 5.18, "learning_rate": 6.776326530612246e-06, "loss": 2.123, "step": 467000 }, { "FLOPS loss": 0.06064632534980774, "L0_d": 932.72, "MLM loss": 1.9990763664245605, "epoch": 5.19, "step": 467499 }, { "epoch": 5.19, "learning_rate": 6.674285714285715e-06, "loss": 2.1226, "step": 467500 }, { "FLOPS loss": 0.044811710715293884, "L0_d": 761.84, "MLM loss": 2.1462574005126953, "epoch": 5.19, "step": 467999 }, { "epoch": 5.19, "learning_rate": 6.572244897959184e-06, "loss": 2.1224, "step": 468000 }, { "FLOPS loss": 0.057425305247306824, "L0_d": 903.44, "MLM loss": 2.150848150253296, "epoch": 5.2, "step": 468499 }, { "epoch": 5.2, "learning_rate": 6.470408163265307e-06, "loss": 2.124, "step": 468500 }, { "FLOPS loss": 0.04412500932812691, "L0_d": 770.81, "MLM loss": 2.040346145629883, "epoch": 5.2, "step": 468999 }, { "epoch": 5.2, "learning_rate": 6.368367346938777e-06, "loss": 2.1231, "step": 469000 }, { "FLOPS loss": 0.049536511301994324, "L0_d": 830.97, "MLM loss": 1.9905624389648438, "epoch": 5.21, "step": 469499 }, { "epoch": 5.21, "learning_rate": 6.266326530612245e-06, "loss": 2.1247, "step": 469500 }, { "FLOPS loss": 0.04591776803135872, "L0_d": 847.62, "MLM loss": 2.030029773712158, "epoch": 5.22, "step": 469999 }, { "epoch": 5.22, "learning_rate": 6.1642857142857144e-06, "loss": 2.1196, "step": 470000 }, { "FLOPS loss": 0.048578400164842606, "L0_d": 946.42, "MLM loss": 2.1271989345550537, "epoch": 5.22, "step": 470499 }, { "epoch": 5.22, "learning_rate": 6.062244897959183e-06, "loss": 2.125, "step": 470500 }, { "FLOPS loss": 0.05150909349322319, "L0_d": 828.25, "MLM loss": 2.0788793563842773, "epoch": 5.23, "step": 470999 }, { "epoch": 5.23, "learning_rate": 5.960204081632653e-06, "loss": 2.1202, "step": 471000 }, { "FLOPS loss": 0.048870816826820374, "L0_d": 721.98, "MLM loss": 2.0828723907470703, "epoch": 5.23, "step": 471499 }, { "epoch": 5.23, "learning_rate": 5.8583673469387755e-06, "loss": 2.1215, "step": 471500 }, { "FLOPS loss": 0.057634588330984116, "L0_d": 907.8, "MLM loss": 2.1410462856292725, "epoch": 5.24, "step": 471999 }, { "epoch": 5.24, "learning_rate": 5.756326530612245e-06, "loss": 2.121, "step": 472000 }, { "FLOPS loss": 0.05204245448112488, "L0_d": 988.41, "MLM loss": 2.2734556198120117, "epoch": 5.24, "step": 472499 }, { "epoch": 5.24, "learning_rate": 5.654285714285715e-06, "loss": 2.1237, "step": 472500 }, { "FLOPS loss": 0.05399225279688835, "L0_d": 839.47, "MLM loss": 2.2295637130737305, "epoch": 5.25, "step": 472999 }, { "epoch": 5.25, "learning_rate": 5.5524489795918375e-06, "loss": 2.125, "step": 473000 }, { "FLOPS loss": 0.053103238344192505, "L0_d": 867.36, "MLM loss": 2.145981788635254, "epoch": 5.25, "step": 473499 }, { "epoch": 5.25, "learning_rate": 5.450408163265306e-06, "loss": 2.1184, "step": 473500 }, { "FLOPS loss": 0.05431005731225014, "L0_d": 842.8, "MLM loss": 1.8932204246520996, "epoch": 5.26, "step": 473999 }, { "epoch": 5.26, "learning_rate": 5.348367346938775e-06, "loss": 2.1174, "step": 474000 }, { "FLOPS loss": 0.06053243204951286, "L0_d": 1039.88, "MLM loss": 2.203632354736328, "epoch": 5.27, "step": 474499 }, { "epoch": 5.27, "learning_rate": 5.246326530612245e-06, "loss": 2.12, "step": 474500 }, { "FLOPS loss": 0.0506560280919075, "L0_d": 807.41, "MLM loss": 1.9568325281143188, "epoch": 5.27, "step": 474999 }, { "epoch": 5.27, "learning_rate": 5.144285714285715e-06, "loss": 2.1247, "step": 475000 }, { "FLOPS loss": 0.047664374113082886, "L0_d": 855.44, "MLM loss": 2.073967933654785, "epoch": 5.28, "step": 475499 }, { "epoch": 5.28, "learning_rate": 5.042448979591837e-06, "loss": 2.1194, "step": 475500 }, { "FLOPS loss": 0.05150017514824867, "L0_d": 863.53, "MLM loss": 2.1492044925689697, "epoch": 5.28, "step": 475999 }, { "epoch": 5.28, "learning_rate": 4.940408163265307e-06, "loss": 2.1179, "step": 476000 }, { "FLOPS loss": 0.05474089831113815, "L0_d": 955.33, "MLM loss": 2.0979180335998535, "epoch": 5.29, "step": 476499 }, { "epoch": 5.29, "learning_rate": 4.838367346938775e-06, "loss": 2.1193, "step": 476500 }, { "FLOPS loss": 0.04201635718345642, "L0_d": 857.8, "MLM loss": 1.9226372241973877, "epoch": 5.29, "step": 476999 }, { "epoch": 5.29, "learning_rate": 4.736326530612245e-06, "loss": 2.1196, "step": 477000 }, { "FLOPS loss": 0.0573698952794075, "L0_d": 885.31, "MLM loss": 1.7628611326217651, "epoch": 5.3, "step": 477499 }, { "epoch": 5.3, "learning_rate": 4.634285714285715e-06, "loss": 2.1234, "step": 477500 }, { "FLOPS loss": 0.04896373301744461, "L0_d": 871.89, "MLM loss": 2.028053045272827, "epoch": 5.3, "step": 477999 }, { "epoch": 5.3, "learning_rate": 4.5322448979591845e-06, "loss": 2.1225, "step": 478000 }, { "FLOPS loss": 0.04549040272831917, "L0_d": 848.16, "MLM loss": 1.8938937187194824, "epoch": 5.31, "step": 478499 }, { "epoch": 5.31, "learning_rate": 4.430204081632653e-06, "loss": 2.12, "step": 478500 }, { "FLOPS loss": 0.06346151232719421, "L0_d": 899.0, "MLM loss": 2.149658679962158, "epoch": 5.32, "step": 478999 }, { "epoch": 5.32, "learning_rate": 4.328163265306122e-06, "loss": 2.1203, "step": 479000 }, { "FLOPS loss": 0.05217743292450905, "L0_d": 777.38, "MLM loss": 2.0618581771850586, "epoch": 5.32, "step": 479499 }, { "epoch": 5.32, "learning_rate": 4.226122448979592e-06, "loss": 2.1209, "step": 479500 }, { "FLOPS loss": 0.05445516109466553, "L0_d": 862.58, "MLM loss": 2.0027682781219482, "epoch": 5.33, "step": 479999 }, { "epoch": 5.33, "learning_rate": 4.1242857142857145e-06, "loss": 2.1187, "step": 480000 }, { "FLOPS loss": 0.06536096334457397, "L0_d": 989.39, "MLM loss": 1.971318006515503, "epoch": 5.33, "step": 480499 }, { "epoch": 5.33, "learning_rate": 4.022244897959184e-06, "loss": 2.1174, "step": 480500 }, { "FLOPS loss": 0.05409427359700203, "L0_d": 861.14, "MLM loss": 2.140533685684204, "epoch": 5.34, "step": 480999 }, { "epoch": 5.34, "learning_rate": 3.920204081632653e-06, "loss": 2.1167, "step": 481000 }, { "FLOPS loss": 0.056367021054029465, "L0_d": 883.81, "MLM loss": 2.0965638160705566, "epoch": 5.34, "step": 481499 }, { "epoch": 5.34, "learning_rate": 3.818163265306122e-06, "loss": 2.1218, "step": 481500 }, { "FLOPS loss": 0.05554311349987984, "L0_d": 969.8, "MLM loss": 2.0110745429992676, "epoch": 5.35, "step": 481999 }, { "epoch": 5.35, "learning_rate": 3.716326530612245e-06, "loss": 2.1223, "step": 482000 }, { "FLOPS loss": 0.05713539198040962, "L0_d": 1078.86, "MLM loss": 2.110353469848633, "epoch": 5.35, "step": 482499 }, { "epoch": 5.35, "learning_rate": 3.6142857142857143e-06, "loss": 2.1218, "step": 482500 }, { "FLOPS loss": 0.05139705166220665, "L0_d": 857.95, "MLM loss": 2.1457486152648926, "epoch": 5.36, "step": 482999 }, { "epoch": 5.36, "learning_rate": 3.512244897959184e-06, "loss": 2.1192, "step": 483000 }, { "FLOPS loss": 0.0548887699842453, "L0_d": 871.45, "MLM loss": 2.08449649810791, "epoch": 5.37, "step": 483499 }, { "epoch": 5.37, "learning_rate": 3.410204081632653e-06, "loss": 2.1169, "step": 483500 }, { "FLOPS loss": 0.04901834577322006, "L0_d": 902.42, "MLM loss": 2.1051578521728516, "epoch": 5.37, "step": 483999 }, { "epoch": 5.37, "learning_rate": 3.308571428571429e-06, "loss": 2.1149, "step": 484000 }, { "FLOPS loss": 0.04898621141910553, "L0_d": 811.02, "MLM loss": 2.0051395893096924, "epoch": 5.38, "step": 484499 }, { "epoch": 5.38, "learning_rate": 3.2065306122448978e-06, "loss": 2.1178, "step": 484500 }, { "FLOPS loss": 0.055309049785137177, "L0_d": 871.8, "MLM loss": 2.0303032398223877, "epoch": 5.38, "step": 484999 }, { "epoch": 5.38, "learning_rate": 3.104489795918367e-06, "loss": 2.1161, "step": 485000 }, { "FLOPS loss": 0.05627648904919624, "L0_d": 872.62, "MLM loss": 1.8871476650238037, "epoch": 5.39, "step": 485499 }, { "epoch": 5.39, "learning_rate": 3.002448979591837e-06, "loss": 2.1161, "step": 485500 }, { "FLOPS loss": 0.05859161168336868, "L0_d": 922.56, "MLM loss": 1.9770585298538208, "epoch": 5.39, "step": 485999 }, { "epoch": 5.39, "learning_rate": 2.9004081632653063e-06, "loss": 2.1213, "step": 486000 }, { "FLOPS loss": 0.05048004165291786, "L0_d": 892.64, "MLM loss": 1.9427074193954468, "epoch": 5.4, "step": 486499 }, { "epoch": 5.4, "learning_rate": 2.7983673469387756e-06, "loss": 2.1198, "step": 486500 }, { "FLOPS loss": 0.04645688831806183, "L0_d": 849.52, "MLM loss": 2.0516858100891113, "epoch": 5.4, "step": 486999 }, { "epoch": 5.4, "learning_rate": 2.6963265306122454e-06, "loss": 2.1193, "step": 487000 }, { "FLOPS loss": 0.05815611407160759, "L0_d": 933.14, "MLM loss": 2.0949809551239014, "epoch": 5.41, "step": 487499 }, { "epoch": 5.41, "learning_rate": 2.5942857142857143e-06, "loss": 2.1162, "step": 487500 }, { "FLOPS loss": 0.06016060709953308, "L0_d": 1001.53, "MLM loss": 2.1935060024261475, "epoch": 5.42, "step": 487999 }, { "epoch": 5.42, "learning_rate": 2.4924489795918367e-06, "loss": 2.1195, "step": 488000 }, { "FLOPS loss": 0.07078199088573456, "L0_d": 1064.05, "MLM loss": 2.0690457820892334, "epoch": 5.42, "step": 488499 }, { "epoch": 5.42, "learning_rate": 2.3904081632653065e-06, "loss": 2.1158, "step": 488500 }, { "FLOPS loss": 0.05600230395793915, "L0_d": 952.5, "MLM loss": 2.2216546535491943, "epoch": 5.43, "step": 488999 }, { "epoch": 5.43, "learning_rate": 2.2883673469387754e-06, "loss": 2.1195, "step": 489000 }, { "FLOPS loss": 0.05007050931453705, "L0_d": 973.44, "MLM loss": 1.965820074081421, "epoch": 5.43, "step": 489499 }, { "epoch": 5.43, "learning_rate": 2.186326530612245e-06, "loss": 2.1187, "step": 489500 }, { "FLOPS loss": 0.04523215815424919, "L0_d": 833.73, "MLM loss": 2.0820789337158203, "epoch": 5.44, "step": 489999 }, { "epoch": 5.44, "learning_rate": 2.084285714285714e-06, "loss": 2.1195, "step": 490000 }, { "FLOPS loss": 0.05340617522597313, "L0_d": 943.03, "MLM loss": 2.1225597858428955, "epoch": 5.44, "step": 490499 }, { "epoch": 5.44, "learning_rate": 1.982448979591837e-06, "loss": 2.1177, "step": 490500 }, { "FLOPS loss": 0.054495275020599365, "L0_d": 885.84, "MLM loss": 2.0798439979553223, "epoch": 5.45, "step": 490999 }, { "epoch": 5.45, "learning_rate": 1.8804081632653063e-06, "loss": 2.1175, "step": 491000 }, { "FLOPS loss": 0.059663232415914536, "L0_d": 960.39, "MLM loss": 2.1692159175872803, "epoch": 5.45, "step": 491499 }, { "epoch": 5.45, "learning_rate": 1.7783673469387754e-06, "loss": 2.1164, "step": 491500 }, { "FLOPS loss": 0.04664740338921547, "L0_d": 845.81, "MLM loss": 1.9624402523040771, "epoch": 5.46, "step": 491999 }, { "epoch": 5.46, "learning_rate": 1.676326530612245e-06, "loss": 2.1191, "step": 492000 }, { "FLOPS loss": 0.05143542215228081, "L0_d": 1207.7, "MLM loss": 2.0010082721710205, "epoch": 5.47, "step": 492499 }, { "epoch": 5.47, "learning_rate": 1.5744897959183676e-06, "loss": 2.1126, "step": 492500 }, { "FLOPS loss": 0.04587966203689575, "L0_d": 804.19, "MLM loss": 2.015944242477417, "epoch": 5.47, "step": 492999 }, { "epoch": 5.47, "learning_rate": 1.472448979591837e-06, "loss": 2.1166, "step": 493000 }, { "FLOPS loss": 0.04756327345967293, "L0_d": 845.25, "MLM loss": 2.0778701305389404, "epoch": 5.48, "step": 493499 }, { "epoch": 5.48, "learning_rate": 1.3704081632653063e-06, "loss": 2.115, "step": 493500 }, { "FLOPS loss": 0.05024033784866333, "L0_d": 845.3, "MLM loss": 2.270461320877075, "epoch": 5.48, "step": 493999 }, { "epoch": 5.48, "learning_rate": 1.2683673469387756e-06, "loss": 2.1146, "step": 494000 }, { "FLOPS loss": 0.051272064447402954, "L0_d": 910.12, "MLM loss": 1.9905471801757812, "epoch": 5.49, "step": 494499 }, { "epoch": 5.49, "learning_rate": 1.166326530612245e-06, "loss": 2.1163, "step": 494500 }, { "FLOPS loss": 0.049323007464408875, "L0_d": 826.39, "MLM loss": 2.0049796104431152, "epoch": 5.49, "step": 494999 }, { "epoch": 5.49, "learning_rate": 1.0644897959183674e-06, "loss": 2.1152, "step": 495000 }, { "FLOPS loss": 0.053387634456157684, "L0_d": 851.0, "MLM loss": 2.2483131885528564, "epoch": 5.5, "step": 495499 }, { "epoch": 5.5, "learning_rate": 9.624489795918367e-07, "loss": 2.1133, "step": 495500 }, { "FLOPS loss": 0.05112237483263016, "L0_d": 811.38, "MLM loss": 2.14658260345459, "epoch": 5.5, "step": 495999 }, { "epoch": 5.5, "learning_rate": 8.604081632653062e-07, "loss": 2.1174, "step": 496000 }, { "FLOPS loss": 0.05232582241296768, "L0_d": 957.78, "MLM loss": 2.015819787979126, "epoch": 5.51, "step": 496499 }, { "epoch": 5.51, "learning_rate": 7.583673469387756e-07, "loss": 2.1129, "step": 496500 }, { "FLOPS loss": 0.04806318134069443, "L0_d": 822.19, "MLM loss": 1.975080966949463, "epoch": 5.52, "step": 496999 }, { "epoch": 5.52, "learning_rate": 6.56530612244898e-07, "loss": 2.114, "step": 497000 }, { "FLOPS loss": 0.04816322401165962, "L0_d": 890.94, "MLM loss": 2.0904037952423096, "epoch": 5.52, "step": 497499 }, { "epoch": 5.52, "learning_rate": 5.544897959183674e-07, "loss": 2.1129, "step": 497500 }, { "FLOPS loss": 0.05622658506035805, "L0_d": 1019.56, "MLM loss": 2.130481243133545, "epoch": 5.53, "step": 497999 }, { "epoch": 5.53, "learning_rate": 4.5244897959183675e-07, "loss": 2.1138, "step": 498000 }, { "FLOPS loss": 0.051506780087947845, "L0_d": 1017.53, "MLM loss": 2.0182487964630127, "epoch": 5.53, "step": 498499 }, { "epoch": 5.53, "learning_rate": 3.504081632653061e-07, "loss": 2.1184, "step": 498500 }, { "FLOPS loss": 0.05676684528589249, "L0_d": 928.22, "MLM loss": 2.0784709453582764, "epoch": 5.54, "step": 498999 }, { "epoch": 5.54, "learning_rate": 2.4857142857142854e-07, "loss": 2.1149, "step": 499000 }, { "FLOPS loss": 0.05501260980963707, "L0_d": 1034.62, "MLM loss": 2.182220220565796, "epoch": 5.54, "step": 499499 }, { "epoch": 5.54, "learning_rate": 1.4653061224489795e-07, "loss": 2.1154, "step": 499500 }, { "FLOPS loss": 0.07184524834156036, "L0_d": 1145.64, "MLM loss": 2.1253085136413574, "epoch": 5.55, "step": 499999 }, { "epoch": 5.55, "learning_rate": 4.448979591836735e-08, "loss": 2.1152, "step": 500000 }, { "epoch": 5.55, "step": 500000, "total_flos": 8.483353196138332e+18, "train_loss": 2.416302919921875, "train_runtime": 186700.1564, "train_samples_per_second": 685.591, "train_steps_per_second": 2.678 } ], "max_steps": 500000, "num_train_epochs": 6, "total_flos": 8.483353196138332e+18, "trial_name": null, "trial_params": null }