{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 127150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.980338183248132e-05, "loss": 8.0518, "step": 500 }, { "epoch": 0.08, "learning_rate": 4.9606763664962646e-05, "loss": 3.358, "step": 1000 }, { "epoch": 0.12, "learning_rate": 4.9410145497443964e-05, "loss": 2.9014, "step": 1500 }, { "epoch": 0.16, "learning_rate": 4.921352732992528e-05, "loss": 2.6617, "step": 2000 }, { "epoch": 0.2, "learning_rate": 4.901690916240661e-05, "loss": 2.4945, "step": 2500 }, { "epoch": 0.24, "learning_rate": 4.882029099488793e-05, "loss": 2.3686, "step": 3000 }, { "epoch": 0.28, "learning_rate": 4.862367282736925e-05, "loss": 2.2674, "step": 3500 }, { "epoch": 0.31, "learning_rate": 4.8427054659850575e-05, "loss": 2.1889, "step": 4000 }, { "epoch": 0.35, "learning_rate": 4.8230436492331893e-05, "loss": 2.1107, "step": 4500 }, { "epoch": 0.39, "learning_rate": 4.803381832481321e-05, "loss": 2.0427, "step": 5000 }, { "epoch": 0.43, "learning_rate": 4.783720015729454e-05, "loss": 1.9907, "step": 5500 }, { "epoch": 0.47, "learning_rate": 4.7640581989775855e-05, "loss": 1.9401, "step": 6000 }, { "epoch": 0.51, "learning_rate": 4.744396382225718e-05, "loss": 1.9027, "step": 6500 }, { "epoch": 0.55, "learning_rate": 4.72473456547385e-05, "loss": 1.8655, "step": 7000 }, { "epoch": 0.59, "learning_rate": 4.705072748721982e-05, "loss": 1.823, "step": 7500 }, { "epoch": 0.63, "learning_rate": 4.685410931970115e-05, "loss": 1.7995, "step": 8000 }, { "epoch": 0.67, "learning_rate": 4.6657491152182466e-05, "loss": 1.7682, "step": 8500 }, { "epoch": 0.71, "learning_rate": 4.6460872984663785e-05, "loss": 1.7367, "step": 9000 }, { "epoch": 0.75, "learning_rate": 4.626425481714511e-05, "loss": 1.7111, "step": 9500 }, { "epoch": 0.79, "learning_rate": 4.606763664962643e-05, "loss": 1.6903, "step": 10000 }, { "epoch": 0.83, "learning_rate": 4.5871018482107746e-05, "loss": 1.6655, "step": 10500 }, { "epoch": 0.87, "learning_rate": 4.567440031458907e-05, "loss": 1.6468, "step": 11000 }, { "epoch": 0.9, "learning_rate": 4.547778214707039e-05, "loss": 1.6196, "step": 11500 }, { "epoch": 0.94, "learning_rate": 4.528116397955171e-05, "loss": 1.6017, "step": 12000 }, { "epoch": 0.98, "learning_rate": 4.508454581203303e-05, "loss": 1.5889, "step": 12500 }, { "epoch": 1.02, "learning_rate": 4.488792764451436e-05, "loss": 1.5679, "step": 13000 }, { "epoch": 1.06, "learning_rate": 4.4691309476995676e-05, "loss": 1.5419, "step": 13500 }, { "epoch": 1.1, "learning_rate": 4.4494691309477e-05, "loss": 1.5339, "step": 14000 }, { "epoch": 1.14, "learning_rate": 4.429807314195832e-05, "loss": 1.5189, "step": 14500 }, { "epoch": 1.18, "learning_rate": 4.410145497443964e-05, "loss": 1.5071, "step": 15000 }, { "epoch": 1.22, "learning_rate": 4.390483680692096e-05, "loss": 1.4936, "step": 15500 }, { "epoch": 1.26, "learning_rate": 4.370821863940228e-05, "loss": 1.4849, "step": 16000 }, { "epoch": 1.3, "learning_rate": 4.3511600471883605e-05, "loss": 1.4671, "step": 16500 }, { "epoch": 1.34, "learning_rate": 4.3314982304364923e-05, "loss": 1.4519, "step": 17000 }, { "epoch": 1.38, "learning_rate": 4.311836413684625e-05, "loss": 1.4411, "step": 17500 }, { "epoch": 1.42, "learning_rate": 4.2921745969327573e-05, "loss": 1.4355, "step": 18000 }, { "epoch": 1.45, "learning_rate": 4.272512780180889e-05, "loss": 1.4224, "step": 18500 }, { "epoch": 1.49, "learning_rate": 4.252850963429021e-05, "loss": 1.4221, "step": 19000 }, { "epoch": 1.53, "learning_rate": 4.2331891466771535e-05, "loss": 1.4053, "step": 19500 }, { "epoch": 1.57, "learning_rate": 4.213527329925285e-05, "loss": 1.394, "step": 20000 }, { "epoch": 1.61, "learning_rate": 4.193865513173417e-05, "loss": 1.3904, "step": 20500 }, { "epoch": 1.65, "learning_rate": 4.1742036964215496e-05, "loss": 1.377, "step": 21000 }, { "epoch": 1.69, "learning_rate": 4.1545418796696815e-05, "loss": 1.3735, "step": 21500 }, { "epoch": 1.73, "learning_rate": 4.134880062917813e-05, "loss": 1.364, "step": 22000 }, { "epoch": 1.77, "learning_rate": 4.115218246165946e-05, "loss": 1.3605, "step": 22500 }, { "epoch": 1.81, "learning_rate": 4.095556429414078e-05, "loss": 1.3539, "step": 23000 }, { "epoch": 1.85, "learning_rate": 4.07589461266221e-05, "loss": 1.3397, "step": 23500 }, { "epoch": 1.89, "learning_rate": 4.0562327959103426e-05, "loss": 1.3326, "step": 24000 }, { "epoch": 1.93, "learning_rate": 4.0365709791584744e-05, "loss": 1.3295, "step": 24500 }, { "epoch": 1.97, "learning_rate": 4.016909162406606e-05, "loss": 1.3297, "step": 25000 }, { "epoch": 2.01, "learning_rate": 3.997247345654739e-05, "loss": 1.312, "step": 25500 }, { "epoch": 2.04, "learning_rate": 3.9775855289028706e-05, "loss": 1.2981, "step": 26000 }, { "epoch": 2.08, "learning_rate": 3.957923712151003e-05, "loss": 1.2988, "step": 26500 }, { "epoch": 2.12, "learning_rate": 3.938261895399135e-05, "loss": 1.289, "step": 27000 }, { "epoch": 2.16, "learning_rate": 3.9186000786472674e-05, "loss": 1.2861, "step": 27500 }, { "epoch": 2.2, "learning_rate": 3.8989382618954e-05, "loss": 1.2823, "step": 28000 }, { "epoch": 2.24, "learning_rate": 3.879276445143532e-05, "loss": 1.2718, "step": 28500 }, { "epoch": 2.28, "learning_rate": 3.8596146283916635e-05, "loss": 1.2673, "step": 29000 }, { "epoch": 2.32, "learning_rate": 3.839952811639796e-05, "loss": 1.2672, "step": 29500 }, { "epoch": 2.36, "learning_rate": 3.820290994887928e-05, "loss": 1.2538, "step": 30000 }, { "epoch": 2.4, "learning_rate": 3.80062917813606e-05, "loss": 1.2533, "step": 30500 }, { "epoch": 2.44, "learning_rate": 3.780967361384192e-05, "loss": 1.2519, "step": 31000 }, { "epoch": 2.48, "learning_rate": 3.761305544632324e-05, "loss": 1.2437, "step": 31500 }, { "epoch": 2.52, "learning_rate": 3.741643727880456e-05, "loss": 1.2363, "step": 32000 }, { "epoch": 2.56, "learning_rate": 3.721981911128588e-05, "loss": 1.2397, "step": 32500 }, { "epoch": 2.6, "learning_rate": 3.702320094376721e-05, "loss": 1.2328, "step": 33000 }, { "epoch": 2.63, "learning_rate": 3.6826582776248526e-05, "loss": 1.2336, "step": 33500 }, { "epoch": 2.67, "learning_rate": 3.662996460872985e-05, "loss": 1.2271, "step": 34000 }, { "epoch": 2.71, "learning_rate": 3.643334644121117e-05, "loss": 1.2199, "step": 34500 }, { "epoch": 2.75, "learning_rate": 3.623672827369249e-05, "loss": 1.2167, "step": 35000 }, { "epoch": 2.79, "learning_rate": 3.604011010617381e-05, "loss": 1.2115, "step": 35500 }, { "epoch": 2.83, "learning_rate": 3.584349193865513e-05, "loss": 1.2095, "step": 36000 }, { "epoch": 2.87, "learning_rate": 3.564687377113645e-05, "loss": 1.2043, "step": 36500 }, { "epoch": 2.91, "learning_rate": 3.5450255603617774e-05, "loss": 1.2029, "step": 37000 }, { "epoch": 2.95, "learning_rate": 3.52536374360991e-05, "loss": 1.1952, "step": 37500 }, { "epoch": 2.99, "learning_rate": 3.5057019268580424e-05, "loss": 1.1916, "step": 38000 }, { "epoch": 3.03, "learning_rate": 3.486040110106174e-05, "loss": 1.183, "step": 38500 }, { "epoch": 3.07, "learning_rate": 3.466378293354306e-05, "loss": 1.1823, "step": 39000 }, { "epoch": 3.11, "learning_rate": 3.4467164766024386e-05, "loss": 1.1806, "step": 39500 }, { "epoch": 3.15, "learning_rate": 3.4270546598505704e-05, "loss": 1.182, "step": 40000 }, { "epoch": 3.19, "learning_rate": 3.407392843098702e-05, "loss": 1.1705, "step": 40500 }, { "epoch": 3.22, "learning_rate": 3.387731026346835e-05, "loss": 1.174, "step": 41000 }, { "epoch": 3.26, "learning_rate": 3.3680692095949665e-05, "loss": 1.1634, "step": 41500 }, { "epoch": 3.3, "learning_rate": 3.3484073928430984e-05, "loss": 1.1655, "step": 42000 }, { "epoch": 3.34, "learning_rate": 3.328745576091231e-05, "loss": 1.1622, "step": 42500 }, { "epoch": 3.38, "learning_rate": 3.3090837593393633e-05, "loss": 1.161, "step": 43000 }, { "epoch": 3.42, "learning_rate": 3.289421942587495e-05, "loss": 1.1495, "step": 43500 }, { "epoch": 3.46, "learning_rate": 3.269760125835628e-05, "loss": 1.1522, "step": 44000 }, { "epoch": 3.5, "learning_rate": 3.2500983090837595e-05, "loss": 1.1483, "step": 44500 }, { "epoch": 3.54, "learning_rate": 3.230436492331891e-05, "loss": 1.1516, "step": 45000 }, { "epoch": 3.58, "learning_rate": 3.210774675580024e-05, "loss": 1.1475, "step": 45500 }, { "epoch": 3.62, "learning_rate": 3.1911128588281556e-05, "loss": 1.1386, "step": 46000 }, { "epoch": 3.66, "learning_rate": 3.1714510420762875e-05, "loss": 1.142, "step": 46500 }, { "epoch": 3.7, "learning_rate": 3.15178922532442e-05, "loss": 1.1403, "step": 47000 }, { "epoch": 3.74, "learning_rate": 3.1321274085725525e-05, "loss": 1.1328, "step": 47500 }, { "epoch": 3.78, "learning_rate": 3.112465591820685e-05, "loss": 1.1257, "step": 48000 }, { "epoch": 3.81, "learning_rate": 3.092803775068817e-05, "loss": 1.1259, "step": 48500 }, { "epoch": 3.85, "learning_rate": 3.0731419583169486e-05, "loss": 1.128, "step": 49000 }, { "epoch": 3.89, "learning_rate": 3.053480141565081e-05, "loss": 1.1299, "step": 49500 }, { "epoch": 3.93, "learning_rate": 3.033818324813213e-05, "loss": 1.1285, "step": 50000 }, { "epoch": 3.97, "learning_rate": 3.0141565080613447e-05, "loss": 1.1224, "step": 50500 }, { "epoch": 4.01, "learning_rate": 2.9944946913094772e-05, "loss": 1.116, "step": 51000 }, { "epoch": 4.05, "learning_rate": 2.9748328745576094e-05, "loss": 1.1125, "step": 51500 }, { "epoch": 4.09, "learning_rate": 2.9551710578057412e-05, "loss": 1.1031, "step": 52000 }, { "epoch": 4.13, "learning_rate": 2.9355092410538737e-05, "loss": 1.1091, "step": 52500 }, { "epoch": 4.17, "learning_rate": 2.9158474243020055e-05, "loss": 1.1058, "step": 53000 }, { "epoch": 4.21, "learning_rate": 2.8961856075501377e-05, "loss": 1.1025, "step": 53500 }, { "epoch": 4.25, "learning_rate": 2.8765237907982702e-05, "loss": 1.1076, "step": 54000 }, { "epoch": 4.29, "learning_rate": 2.856861974046402e-05, "loss": 1.1052, "step": 54500 }, { "epoch": 4.33, "learning_rate": 2.837200157294534e-05, "loss": 1.0995, "step": 55000 }, { "epoch": 4.36, "learning_rate": 2.8175383405426664e-05, "loss": 1.0948, "step": 55500 }, { "epoch": 4.4, "learning_rate": 2.7978765237907985e-05, "loss": 1.0985, "step": 56000 }, { "epoch": 4.44, "learning_rate": 2.7782147070389303e-05, "loss": 1.0969, "step": 56500 }, { "epoch": 4.48, "learning_rate": 2.758552890287063e-05, "loss": 1.0921, "step": 57000 }, { "epoch": 4.52, "learning_rate": 2.7388910735351947e-05, "loss": 1.0913, "step": 57500 }, { "epoch": 4.56, "learning_rate": 2.719229256783327e-05, "loss": 1.0883, "step": 58000 }, { "epoch": 4.6, "learning_rate": 2.699567440031459e-05, "loss": 1.0859, "step": 58500 }, { "epoch": 4.64, "learning_rate": 2.679905623279591e-05, "loss": 1.0898, "step": 59000 }, { "epoch": 4.68, "learning_rate": 2.6602438065277236e-05, "loss": 1.0809, "step": 59500 }, { "epoch": 4.72, "learning_rate": 2.6405819897758555e-05, "loss": 1.084, "step": 60000 }, { "epoch": 4.76, "learning_rate": 2.6209201730239873e-05, "loss": 1.0767, "step": 60500 }, { "epoch": 4.8, "learning_rate": 2.6012583562721198e-05, "loss": 1.0709, "step": 61000 }, { "epoch": 4.84, "learning_rate": 2.581596539520252e-05, "loss": 1.0789, "step": 61500 }, { "epoch": 4.88, "learning_rate": 2.5619347227683838e-05, "loss": 1.0779, "step": 62000 }, { "epoch": 4.92, "learning_rate": 2.5422729060165163e-05, "loss": 1.0732, "step": 62500 }, { "epoch": 4.95, "learning_rate": 2.522611089264648e-05, "loss": 1.0718, "step": 63000 }, { "epoch": 4.99, "learning_rate": 2.5029492725127802e-05, "loss": 1.0665, "step": 63500 }, { "epoch": 5.03, "learning_rate": 2.4832874557609124e-05, "loss": 1.0673, "step": 64000 }, { "epoch": 5.07, "learning_rate": 2.4636256390090446e-05, "loss": 1.0603, "step": 64500 }, { "epoch": 5.11, "learning_rate": 2.4439638222571767e-05, "loss": 1.0557, "step": 65000 }, { "epoch": 5.15, "learning_rate": 2.4243020055053085e-05, "loss": 1.0563, "step": 65500 }, { "epoch": 5.19, "learning_rate": 2.404640188753441e-05, "loss": 1.0557, "step": 66000 }, { "epoch": 5.23, "learning_rate": 2.3849783720015732e-05, "loss": 1.0586, "step": 66500 }, { "epoch": 5.27, "learning_rate": 2.3653165552497054e-05, "loss": 1.0574, "step": 67000 }, { "epoch": 5.31, "learning_rate": 2.3456547384978372e-05, "loss": 1.0578, "step": 67500 }, { "epoch": 5.35, "learning_rate": 2.3259929217459694e-05, "loss": 1.0569, "step": 68000 }, { "epoch": 5.39, "learning_rate": 2.3063311049941015e-05, "loss": 1.0541, "step": 68500 }, { "epoch": 5.43, "learning_rate": 2.2866692882422337e-05, "loss": 1.0532, "step": 69000 }, { "epoch": 5.47, "learning_rate": 2.267007471490366e-05, "loss": 1.0538, "step": 69500 }, { "epoch": 5.51, "learning_rate": 2.247345654738498e-05, "loss": 1.0467, "step": 70000 }, { "epoch": 5.54, "learning_rate": 2.2276838379866298e-05, "loss": 1.0483, "step": 70500 }, { "epoch": 5.58, "learning_rate": 2.2080220212347623e-05, "loss": 1.0565, "step": 71000 }, { "epoch": 5.62, "learning_rate": 2.1883602044828945e-05, "loss": 1.0494, "step": 71500 }, { "epoch": 5.66, "learning_rate": 2.1686983877310266e-05, "loss": 1.0423, "step": 72000 }, { "epoch": 5.7, "learning_rate": 2.1490365709791585e-05, "loss": 1.0449, "step": 72500 }, { "epoch": 5.74, "learning_rate": 2.1293747542272906e-05, "loss": 1.0447, "step": 73000 }, { "epoch": 5.78, "learning_rate": 2.1097129374754228e-05, "loss": 1.0411, "step": 73500 }, { "epoch": 5.82, "learning_rate": 2.090051120723555e-05, "loss": 1.0424, "step": 74000 }, { "epoch": 5.86, "learning_rate": 2.070389303971687e-05, "loss": 1.0394, "step": 74500 }, { "epoch": 5.9, "learning_rate": 2.0507274872198193e-05, "loss": 1.035, "step": 75000 }, { "epoch": 5.94, "learning_rate": 2.031065670467951e-05, "loss": 1.0343, "step": 75500 }, { "epoch": 5.98, "learning_rate": 2.0114038537160836e-05, "loss": 1.0371, "step": 76000 }, { "epoch": 6.02, "learning_rate": 1.9917420369642157e-05, "loss": 1.0269, "step": 76500 }, { "epoch": 6.06, "learning_rate": 1.972080220212348e-05, "loss": 1.0361, "step": 77000 }, { "epoch": 6.1, "learning_rate": 1.9524184034604797e-05, "loss": 1.0291, "step": 77500 }, { "epoch": 6.13, "learning_rate": 1.932756586708612e-05, "loss": 1.0275, "step": 78000 }, { "epoch": 6.17, "learning_rate": 1.913094769956744e-05, "loss": 1.0308, "step": 78500 }, { "epoch": 6.21, "learning_rate": 1.8934329532048762e-05, "loss": 1.0259, "step": 79000 }, { "epoch": 6.25, "learning_rate": 1.8737711364530084e-05, "loss": 1.0238, "step": 79500 }, { "epoch": 6.29, "learning_rate": 1.8541093197011405e-05, "loss": 1.0263, "step": 80000 }, { "epoch": 6.33, "learning_rate": 1.8344475029492724e-05, "loss": 1.024, "step": 80500 }, { "epoch": 6.37, "learning_rate": 1.814785686197405e-05, "loss": 1.0217, "step": 81000 }, { "epoch": 6.41, "learning_rate": 1.795123869445537e-05, "loss": 1.0229, "step": 81500 }, { "epoch": 6.45, "learning_rate": 1.7754620526936692e-05, "loss": 1.0211, "step": 82000 }, { "epoch": 6.49, "learning_rate": 1.755800235941801e-05, "loss": 1.0143, "step": 82500 }, { "epoch": 6.53, "learning_rate": 1.736138419189933e-05, "loss": 1.0234, "step": 83000 }, { "epoch": 6.57, "learning_rate": 1.7164766024380653e-05, "loss": 1.0182, "step": 83500 }, { "epoch": 6.61, "learning_rate": 1.6968147856861975e-05, "loss": 1.0163, "step": 84000 }, { "epoch": 6.65, "learning_rate": 1.6771529689343296e-05, "loss": 1.0143, "step": 84500 }, { "epoch": 6.69, "learning_rate": 1.6574911521824618e-05, "loss": 1.0184, "step": 85000 }, { "epoch": 6.72, "learning_rate": 1.6378293354305936e-05, "loss": 1.0183, "step": 85500 }, { "epoch": 6.76, "learning_rate": 1.618167518678726e-05, "loss": 1.0174, "step": 86000 }, { "epoch": 6.8, "learning_rate": 1.5985057019268583e-05, "loss": 1.0151, "step": 86500 }, { "epoch": 6.84, "learning_rate": 1.5788438851749904e-05, "loss": 1.0116, "step": 87000 }, { "epoch": 6.88, "learning_rate": 1.5591820684231223e-05, "loss": 1.0114, "step": 87500 }, { "epoch": 6.92, "learning_rate": 1.5395202516712544e-05, "loss": 1.0144, "step": 88000 }, { "epoch": 6.96, "learning_rate": 1.5198584349193868e-05, "loss": 1.0106, "step": 88500 }, { "epoch": 7.0, "learning_rate": 1.5001966181675187e-05, "loss": 1.0196, "step": 89000 }, { "epoch": 7.04, "learning_rate": 1.4805348014156509e-05, "loss": 1.0041, "step": 89500 }, { "epoch": 7.08, "learning_rate": 1.460872984663783e-05, "loss": 1.0061, "step": 90000 }, { "epoch": 7.12, "learning_rate": 1.441211167911915e-05, "loss": 1.0013, "step": 90500 }, { "epoch": 7.16, "learning_rate": 1.4215493511600472e-05, "loss": 1.0085, "step": 91000 }, { "epoch": 7.2, "learning_rate": 1.4018875344081794e-05, "loss": 1.0028, "step": 91500 }, { "epoch": 7.24, "learning_rate": 1.3822257176563117e-05, "loss": 1.0028, "step": 92000 }, { "epoch": 7.27, "learning_rate": 1.3625639009044435e-05, "loss": 0.9988, "step": 92500 }, { "epoch": 7.31, "learning_rate": 1.3429020841525759e-05, "loss": 1.0067, "step": 93000 }, { "epoch": 7.35, "learning_rate": 1.323240267400708e-05, "loss": 1.0072, "step": 93500 }, { "epoch": 7.39, "learning_rate": 1.30357845064884e-05, "loss": 1.0001, "step": 94000 }, { "epoch": 7.43, "learning_rate": 1.2839166338969722e-05, "loss": 1.0012, "step": 94500 }, { "epoch": 7.47, "learning_rate": 1.2642548171451043e-05, "loss": 1.0003, "step": 95000 }, { "epoch": 7.51, "learning_rate": 1.2445930003932365e-05, "loss": 0.9877, "step": 95500 }, { "epoch": 7.55, "learning_rate": 1.2249311836413685e-05, "loss": 1.0001, "step": 96000 }, { "epoch": 7.59, "learning_rate": 1.2052693668895006e-05, "loss": 0.9954, "step": 96500 }, { "epoch": 7.63, "learning_rate": 1.1856075501376328e-05, "loss": 0.9963, "step": 97000 }, { "epoch": 7.67, "learning_rate": 1.1659457333857648e-05, "loss": 0.994, "step": 97500 }, { "epoch": 7.71, "learning_rate": 1.1462839166338971e-05, "loss": 1.003, "step": 98000 }, { "epoch": 7.75, "learning_rate": 1.1266220998820291e-05, "loss": 0.9931, "step": 98500 }, { "epoch": 7.79, "learning_rate": 1.1069602831301613e-05, "loss": 0.9983, "step": 99000 }, { "epoch": 7.83, "learning_rate": 1.0872984663782934e-05, "loss": 0.9922, "step": 99500 }, { "epoch": 7.86, "learning_rate": 1.0676366496264254e-05, "loss": 0.9968, "step": 100000 }, { "epoch": 7.9, "learning_rate": 1.0479748328745578e-05, "loss": 0.9956, "step": 100500 }, { "epoch": 7.94, "learning_rate": 1.0283130161226898e-05, "loss": 0.991, "step": 101000 }, { "epoch": 7.98, "learning_rate": 1.008651199370822e-05, "loss": 0.9915, "step": 101500 }, { "epoch": 8.02, "learning_rate": 9.88989382618954e-06, "loss": 0.9859, "step": 102000 }, { "epoch": 8.06, "learning_rate": 9.69327565867086e-06, "loss": 0.986, "step": 102500 }, { "epoch": 8.1, "learning_rate": 9.496657491152184e-06, "loss": 0.9937, "step": 103000 }, { "epoch": 8.14, "learning_rate": 9.300039323633504e-06, "loss": 0.9863, "step": 103500 }, { "epoch": 8.18, "learning_rate": 9.103421156114826e-06, "loss": 0.9836, "step": 104000 }, { "epoch": 8.22, "learning_rate": 8.906802988596147e-06, "loss": 0.9857, "step": 104500 }, { "epoch": 8.26, "learning_rate": 8.710184821077467e-06, "loss": 0.989, "step": 105000 }, { "epoch": 8.3, "learning_rate": 8.51356665355879e-06, "loss": 0.9886, "step": 105500 }, { "epoch": 8.34, "learning_rate": 8.31694848604011e-06, "loss": 0.9885, "step": 106000 }, { "epoch": 8.38, "learning_rate": 8.120330318521432e-06, "loss": 0.9847, "step": 106500 }, { "epoch": 8.42, "learning_rate": 7.923712151002753e-06, "loss": 0.9846, "step": 107000 }, { "epoch": 8.45, "learning_rate": 7.727093983484073e-06, "loss": 0.9882, "step": 107500 }, { "epoch": 8.49, "learning_rate": 7.530475815965396e-06, "loss": 0.9866, "step": 108000 }, { "epoch": 8.53, "learning_rate": 7.333857648446717e-06, "loss": 0.9823, "step": 108500 }, { "epoch": 8.57, "learning_rate": 7.137239480928037e-06, "loss": 0.9841, "step": 109000 }, { "epoch": 8.61, "learning_rate": 6.94062131340936e-06, "loss": 0.9844, "step": 109500 }, { "epoch": 8.65, "learning_rate": 6.7440031458906806e-06, "loss": 0.9817, "step": 110000 }, { "epoch": 8.69, "learning_rate": 6.547384978372002e-06, "loss": 0.9854, "step": 110500 }, { "epoch": 8.73, "learning_rate": 6.350766810853323e-06, "loss": 0.9845, "step": 111000 }, { "epoch": 8.77, "learning_rate": 6.1541486433346445e-06, "loss": 0.984, "step": 111500 }, { "epoch": 8.81, "learning_rate": 5.957530475815966e-06, "loss": 0.9865, "step": 112000 }, { "epoch": 8.85, "learning_rate": 5.760912308297287e-06, "loss": 0.9765, "step": 112500 }, { "epoch": 8.89, "learning_rate": 5.564294140778608e-06, "loss": 0.9867, "step": 113000 }, { "epoch": 8.93, "learning_rate": 5.367675973259929e-06, "loss": 0.986, "step": 113500 }, { "epoch": 8.97, "learning_rate": 5.171057805741251e-06, "loss": 0.9817, "step": 114000 }, { "epoch": 9.01, "learning_rate": 4.9744396382225725e-06, "loss": 0.9774, "step": 114500 }, { "epoch": 9.04, "learning_rate": 4.777821470703893e-06, "loss": 0.9811, "step": 115000 }, { "epoch": 9.08, "learning_rate": 4.581203303185214e-06, "loss": 0.9759, "step": 115500 }, { "epoch": 9.12, "learning_rate": 4.384585135666536e-06, "loss": 0.9769, "step": 116000 }, { "epoch": 9.16, "learning_rate": 4.187966968147857e-06, "loss": 0.9807, "step": 116500 }, { "epoch": 9.2, "learning_rate": 3.991348800629179e-06, "loss": 0.9822, "step": 117000 }, { "epoch": 9.24, "learning_rate": 3.794730633110499e-06, "loss": 0.9843, "step": 117500 }, { "epoch": 9.28, "learning_rate": 3.5981124655918208e-06, "loss": 0.9777, "step": 118000 }, { "epoch": 9.32, "learning_rate": 3.401494298073142e-06, "loss": 0.977, "step": 118500 }, { "epoch": 9.36, "learning_rate": 3.2048761305544636e-06, "loss": 0.9785, "step": 119000 }, { "epoch": 9.4, "learning_rate": 3.0082579630357847e-06, "loss": 0.9756, "step": 119500 }, { "epoch": 9.44, "learning_rate": 2.811639795517106e-06, "loss": 0.9803, "step": 120000 }, { "epoch": 9.48, "learning_rate": 2.615021627998427e-06, "loss": 0.9774, "step": 120500 }, { "epoch": 9.52, "learning_rate": 2.4184034604797483e-06, "loss": 0.984, "step": 121000 }, { "epoch": 9.56, "learning_rate": 2.22178529296107e-06, "loss": 0.9753, "step": 121500 }, { "epoch": 9.59, "learning_rate": 2.025167125442391e-06, "loss": 0.9764, "step": 122000 }, { "epoch": 9.63, "learning_rate": 1.8285489579237123e-06, "loss": 0.9756, "step": 122500 }, { "epoch": 9.67, "learning_rate": 1.6319307904050335e-06, "loss": 0.9733, "step": 123000 }, { "epoch": 9.71, "learning_rate": 1.4353126228863549e-06, "loss": 0.9726, "step": 123500 }, { "epoch": 9.75, "learning_rate": 1.238694455367676e-06, "loss": 0.9783, "step": 124000 }, { "epoch": 9.79, "learning_rate": 1.0420762878489972e-06, "loss": 0.9786, "step": 124500 }, { "epoch": 9.83, "learning_rate": 8.454581203303186e-07, "loss": 0.9802, "step": 125000 }, { "epoch": 9.87, "learning_rate": 6.488399528116398e-07, "loss": 0.979, "step": 125500 }, { "epoch": 9.91, "learning_rate": 4.522217852929611e-07, "loss": 0.978, "step": 126000 }, { "epoch": 9.95, "learning_rate": 2.556036177742823e-07, "loss": 0.9718, "step": 126500 }, { "epoch": 9.99, "learning_rate": 5.898545025560362e-08, "loss": 0.9811, "step": 127000 }, { "epoch": 10.0, "step": 127150, "total_flos": 5.807966088205763e+17, "train_loss": 1.219492632603017, "train_runtime": 160361.9384, "train_samples_per_second": 38.059, "train_steps_per_second": 0.793 } ], "max_steps": 127150, "num_train_epochs": 10, "total_flos": 5.807966088205763e+17, "trial_name": null, "trial_params": null }