{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 94323, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.973495329877125e-05, "loss": 1.7869, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.9469906597542485e-05, "loss": 1.7591, "step": 1000 }, { "epoch": 0.05, "learning_rate": 4.920485989631373e-05, "loss": 1.7192, "step": 1500 }, { "epoch": 0.06, "learning_rate": 4.893981319508498e-05, "loss": 1.6754, "step": 2000 }, { "epoch": 0.08, "learning_rate": 4.867476649385622e-05, "loss": 1.6344, "step": 2500 }, { "epoch": 0.1, "learning_rate": 4.840971979262746e-05, "loss": 1.6237, "step": 3000 }, { "epoch": 0.11, "learning_rate": 4.8144673091398703e-05, "loss": 1.5965, "step": 3500 }, { "epoch": 0.13, "learning_rate": 4.787962639016995e-05, "loss": 1.5753, "step": 4000 }, { "epoch": 0.14, "learning_rate": 4.761457968894119e-05, "loss": 1.5598, "step": 4500 }, { "epoch": 0.16, "learning_rate": 4.734953298771243e-05, "loss": 1.5247, "step": 5000 }, { "epoch": 0.17, "learning_rate": 4.708448628648368e-05, "loss": 1.5149, "step": 5500 }, { "epoch": 0.19, "learning_rate": 4.681943958525493e-05, "loss": 1.5089, "step": 6000 }, { "epoch": 0.21, "learning_rate": 4.655439288402616e-05, "loss": 1.4888, "step": 6500 }, { "epoch": 0.22, "learning_rate": 4.628934618279741e-05, "loss": 1.4738, "step": 7000 }, { "epoch": 0.24, "learning_rate": 4.602429948156866e-05, "loss": 1.4535, "step": 7500 }, { "epoch": 0.25, "learning_rate": 4.57592527803399e-05, "loss": 1.474, "step": 8000 }, { "epoch": 0.27, "learning_rate": 4.549420607911114e-05, "loss": 1.4491, "step": 8500 }, { "epoch": 0.29, "learning_rate": 4.522915937788238e-05, "loss": 1.4336, "step": 9000 }, { "epoch": 0.3, "learning_rate": 4.496411267665363e-05, "loss": 1.4323, "step": 9500 }, { "epoch": 0.32, "learning_rate": 4.469906597542487e-05, "loss": 1.4189, "step": 10000 }, { "epoch": 0.33, "learning_rate": 4.443401927419611e-05, "loss": 1.4101, "step": 10500 }, { "epoch": 0.35, "learning_rate": 4.416897257296736e-05, "loss": 1.3992, "step": 11000 }, { "epoch": 0.37, "learning_rate": 4.390392587173861e-05, "loss": 1.3975, "step": 11500 }, { "epoch": 0.38, "learning_rate": 4.363887917050984e-05, "loss": 1.3845, "step": 12000 }, { "epoch": 0.4, "learning_rate": 4.337383246928109e-05, "loss": 1.3795, "step": 12500 }, { "epoch": 0.41, "learning_rate": 4.310878576805233e-05, "loss": 1.3867, "step": 13000 }, { "epoch": 0.43, "learning_rate": 4.284373906682357e-05, "loss": 1.3647, "step": 13500 }, { "epoch": 0.45, "learning_rate": 4.257869236559482e-05, "loss": 1.3638, "step": 14000 }, { "epoch": 0.46, "learning_rate": 4.231364566436606e-05, "loss": 1.3507, "step": 14500 }, { "epoch": 0.48, "learning_rate": 4.204859896313731e-05, "loss": 1.3462, "step": 15000 }, { "epoch": 0.49, "learning_rate": 4.178355226190855e-05, "loss": 1.3511, "step": 15500 }, { "epoch": 0.51, "learning_rate": 4.151850556067979e-05, "loss": 1.3384, "step": 16000 }, { "epoch": 0.52, "learning_rate": 4.125345885945104e-05, "loss": 1.319, "step": 16500 }, { "epoch": 0.54, "learning_rate": 4.098841215822228e-05, "loss": 1.3331, "step": 17000 }, { "epoch": 0.56, "learning_rate": 4.072336545699352e-05, "loss": 1.3119, "step": 17500 }, { "epoch": 0.57, "learning_rate": 4.045831875576477e-05, "loss": 1.32, "step": 18000 }, { "epoch": 0.59, "learning_rate": 4.019327205453601e-05, "loss": 1.3224, "step": 18500 }, { "epoch": 0.6, "learning_rate": 3.992822535330725e-05, "loss": 1.2868, "step": 19000 }, { "epoch": 0.62, "learning_rate": 3.96631786520785e-05, "loss": 1.2955, "step": 19500 }, { "epoch": 0.64, "learning_rate": 3.939813195084974e-05, "loss": 1.2932, "step": 20000 }, { "epoch": 0.65, "learning_rate": 3.913308524962099e-05, "loss": 1.277, "step": 20500 }, { "epoch": 0.67, "learning_rate": 3.886803854839223e-05, "loss": 1.2759, "step": 21000 }, { "epoch": 0.68, "learning_rate": 3.860299184716347e-05, "loss": 1.2793, "step": 21500 }, { "epoch": 0.7, "learning_rate": 3.833794514593472e-05, "loss": 1.2614, "step": 22000 }, { "epoch": 0.72, "learning_rate": 3.807289844470596e-05, "loss": 1.2768, "step": 22500 }, { "epoch": 0.73, "learning_rate": 3.78078517434772e-05, "loss": 1.2615, "step": 23000 }, { "epoch": 0.75, "learning_rate": 3.754280504224845e-05, "loss": 1.2525, "step": 23500 }, { "epoch": 0.76, "learning_rate": 3.727775834101969e-05, "loss": 1.2593, "step": 24000 }, { "epoch": 0.78, "learning_rate": 3.701271163979093e-05, "loss": 1.2435, "step": 24500 }, { "epoch": 0.8, "learning_rate": 3.674766493856218e-05, "loss": 1.241, "step": 25000 }, { "epoch": 0.81, "learning_rate": 3.648261823733342e-05, "loss": 1.2449, "step": 25500 }, { "epoch": 0.83, "learning_rate": 3.621757153610466e-05, "loss": 1.2229, "step": 26000 }, { "epoch": 0.84, "learning_rate": 3.595252483487591e-05, "loss": 1.2296, "step": 26500 }, { "epoch": 0.86, "learning_rate": 3.568747813364715e-05, "loss": 1.2257, "step": 27000 }, { "epoch": 0.87, "learning_rate": 3.5422431432418397e-05, "loss": 1.2292, "step": 27500 }, { "epoch": 0.89, "learning_rate": 3.515738473118964e-05, "loss": 1.2351, "step": 28000 }, { "epoch": 0.91, "learning_rate": 3.489233802996088e-05, "loss": 1.2179, "step": 28500 }, { "epoch": 0.92, "learning_rate": 3.4627291328732127e-05, "loss": 1.2179, "step": 29000 }, { "epoch": 0.94, "learning_rate": 3.436224462750337e-05, "loss": 1.2057, "step": 29500 }, { "epoch": 0.95, "learning_rate": 3.409719792627461e-05, "loss": 1.2081, "step": 30000 }, { "epoch": 0.97, "learning_rate": 3.3832151225045856e-05, "loss": 1.2069, "step": 30500 }, { "epoch": 0.99, "learning_rate": 3.35671045238171e-05, "loss": 1.1903, "step": 31000 }, { "epoch": 1.0, "learning_rate": 3.330205782258834e-05, "loss": 1.1958, "step": 31500 }, { "epoch": 1.02, "learning_rate": 3.3037011121359586e-05, "loss": 1.1746, "step": 32000 }, { "epoch": 1.03, "learning_rate": 3.277196442013083e-05, "loss": 1.1648, "step": 32500 }, { "epoch": 1.05, "learning_rate": 3.2506917718902075e-05, "loss": 1.1615, "step": 33000 }, { "epoch": 1.07, "learning_rate": 3.2241871017673316e-05, "loss": 1.1732, "step": 33500 }, { "epoch": 1.08, "learning_rate": 3.197682431644456e-05, "loss": 1.1664, "step": 34000 }, { "epoch": 1.1, "learning_rate": 3.1711777615215805e-05, "loss": 1.1639, "step": 34500 }, { "epoch": 1.11, "learning_rate": 3.1446730913987046e-05, "loss": 1.1507, "step": 35000 }, { "epoch": 1.13, "learning_rate": 3.118168421275829e-05, "loss": 1.1577, "step": 35500 }, { "epoch": 1.15, "learning_rate": 3.0916637511529535e-05, "loss": 1.1563, "step": 36000 }, { "epoch": 1.16, "learning_rate": 3.0651590810300776e-05, "loss": 1.1528, "step": 36500 }, { "epoch": 1.18, "learning_rate": 3.0386544109072017e-05, "loss": 1.1461, "step": 37000 }, { "epoch": 1.19, "learning_rate": 3.0121497407843262e-05, "loss": 1.1574, "step": 37500 }, { "epoch": 1.21, "learning_rate": 2.985645070661451e-05, "loss": 1.1503, "step": 38000 }, { "epoch": 1.22, "learning_rate": 2.9591404005385747e-05, "loss": 1.1522, "step": 38500 }, { "epoch": 1.24, "learning_rate": 2.9326357304156992e-05, "loss": 1.1451, "step": 39000 }, { "epoch": 1.26, "learning_rate": 2.9061310602928236e-05, "loss": 1.1335, "step": 39500 }, { "epoch": 1.27, "learning_rate": 2.8796263901699484e-05, "loss": 1.1383, "step": 40000 }, { "epoch": 1.29, "learning_rate": 2.8531217200470722e-05, "loss": 1.1363, "step": 40500 }, { "epoch": 1.3, "learning_rate": 2.8266170499241966e-05, "loss": 1.1277, "step": 41000 }, { "epoch": 1.32, "learning_rate": 2.800112379801321e-05, "loss": 1.1332, "step": 41500 }, { "epoch": 1.34, "learning_rate": 2.773607709678446e-05, "loss": 1.1249, "step": 42000 }, { "epoch": 1.35, "learning_rate": 2.7471030395555696e-05, "loss": 1.1283, "step": 42500 }, { "epoch": 1.37, "learning_rate": 2.720598369432694e-05, "loss": 1.1177, "step": 43000 }, { "epoch": 1.38, "learning_rate": 2.694093699309819e-05, "loss": 1.1023, "step": 43500 }, { "epoch": 1.4, "learning_rate": 2.6675890291869426e-05, "loss": 1.1245, "step": 44000 }, { "epoch": 1.42, "learning_rate": 2.641084359064067e-05, "loss": 1.1194, "step": 44500 }, { "epoch": 1.43, "learning_rate": 2.6145796889411915e-05, "loss": 1.1132, "step": 45000 }, { "epoch": 1.45, "learning_rate": 2.5880750188183163e-05, "loss": 1.1073, "step": 45500 }, { "epoch": 1.46, "learning_rate": 2.56157034869544e-05, "loss": 1.0974, "step": 46000 }, { "epoch": 1.48, "learning_rate": 2.5350656785725645e-05, "loss": 1.1003, "step": 46500 }, { "epoch": 1.49, "learning_rate": 2.508561008449689e-05, "loss": 1.0966, "step": 47000 }, { "epoch": 1.51, "learning_rate": 2.4820563383268134e-05, "loss": 1.0928, "step": 47500 }, { "epoch": 1.53, "learning_rate": 2.4555516682039375e-05, "loss": 1.0868, "step": 48000 }, { "epoch": 1.54, "learning_rate": 2.429046998081062e-05, "loss": 1.0902, "step": 48500 }, { "epoch": 1.56, "learning_rate": 2.4025423279581864e-05, "loss": 1.0978, "step": 49000 }, { "epoch": 1.57, "learning_rate": 2.376037657835311e-05, "loss": 1.0828, "step": 49500 }, { "epoch": 1.59, "learning_rate": 2.349532987712435e-05, "loss": 1.1016, "step": 50000 }, { "epoch": 1.61, "learning_rate": 2.3230283175895594e-05, "loss": 1.0892, "step": 50500 }, { "epoch": 1.62, "learning_rate": 2.296523647466684e-05, "loss": 1.0832, "step": 51000 }, { "epoch": 1.64, "learning_rate": 2.270018977343808e-05, "loss": 1.0668, "step": 51500 }, { "epoch": 1.65, "learning_rate": 2.2435143072209324e-05, "loss": 1.0756, "step": 52000 }, { "epoch": 1.67, "learning_rate": 2.217009637098057e-05, "loss": 1.0714, "step": 52500 }, { "epoch": 1.69, "learning_rate": 2.1905049669751813e-05, "loss": 1.0592, "step": 53000 }, { "epoch": 1.7, "learning_rate": 2.1640002968523054e-05, "loss": 1.07, "step": 53500 }, { "epoch": 1.72, "learning_rate": 2.13749562672943e-05, "loss": 1.0548, "step": 54000 }, { "epoch": 1.73, "learning_rate": 2.1109909566065543e-05, "loss": 1.0589, "step": 54500 }, { "epoch": 1.75, "learning_rate": 2.0844862864836784e-05, "loss": 1.0662, "step": 55000 }, { "epoch": 1.77, "learning_rate": 2.057981616360803e-05, "loss": 1.0678, "step": 55500 }, { "epoch": 1.78, "learning_rate": 2.0314769462379273e-05, "loss": 1.0545, "step": 56000 }, { "epoch": 1.8, "learning_rate": 2.0049722761150517e-05, "loss": 1.0486, "step": 56500 }, { "epoch": 1.81, "learning_rate": 1.978467605992176e-05, "loss": 1.051, "step": 57000 }, { "epoch": 1.83, "learning_rate": 1.9519629358693003e-05, "loss": 1.0389, "step": 57500 }, { "epoch": 1.84, "learning_rate": 1.9254582657464247e-05, "loss": 1.0445, "step": 58000 }, { "epoch": 1.86, "learning_rate": 1.8989535956235492e-05, "loss": 1.0511, "step": 58500 }, { "epoch": 1.88, "learning_rate": 1.8724489255006733e-05, "loss": 1.0384, "step": 59000 }, { "epoch": 1.89, "learning_rate": 1.8459442553777974e-05, "loss": 1.0386, "step": 59500 }, { "epoch": 1.91, "learning_rate": 1.819439585254922e-05, "loss": 1.0321, "step": 60000 }, { "epoch": 1.92, "learning_rate": 1.7929349151320463e-05, "loss": 1.0424, "step": 60500 }, { "epoch": 1.94, "learning_rate": 1.7664302450091707e-05, "loss": 1.0421, "step": 61000 }, { "epoch": 1.96, "learning_rate": 1.7399255748862948e-05, "loss": 1.0334, "step": 61500 }, { "epoch": 1.97, "learning_rate": 1.7134209047634196e-05, "loss": 1.0385, "step": 62000 }, { "epoch": 1.99, "learning_rate": 1.6869162346405437e-05, "loss": 1.0265, "step": 62500 }, { "epoch": 2.0, "learning_rate": 1.660411564517668e-05, "loss": 1.0245, "step": 63000 }, { "epoch": 2.02, "learning_rate": 1.6339068943947926e-05, "loss": 1.0146, "step": 63500 }, { "epoch": 2.04, "learning_rate": 1.6074022242719167e-05, "loss": 1.0161, "step": 64000 }, { "epoch": 2.05, "learning_rate": 1.580897554149041e-05, "loss": 1.0196, "step": 64500 }, { "epoch": 2.07, "learning_rate": 1.5543928840261653e-05, "loss": 1.0118, "step": 65000 }, { "epoch": 2.08, "learning_rate": 1.52788821390329e-05, "loss": 0.9996, "step": 65500 }, { "epoch": 2.1, "learning_rate": 1.5013835437804142e-05, "loss": 1.0026, "step": 66000 }, { "epoch": 2.12, "learning_rate": 1.4748788736575386e-05, "loss": 1.0156, "step": 66500 }, { "epoch": 2.13, "learning_rate": 1.4483742035346629e-05, "loss": 1.0017, "step": 67000 }, { "epoch": 2.15, "learning_rate": 1.4218695334117873e-05, "loss": 1.0129, "step": 67500 }, { "epoch": 2.16, "learning_rate": 1.3953648632889116e-05, "loss": 1.0093, "step": 68000 }, { "epoch": 2.18, "learning_rate": 1.3688601931660359e-05, "loss": 0.9939, "step": 68500 }, { "epoch": 2.19, "learning_rate": 1.3423555230431603e-05, "loss": 0.9891, "step": 69000 }, { "epoch": 2.21, "learning_rate": 1.3158508529202846e-05, "loss": 0.9978, "step": 69500 }, { "epoch": 2.23, "learning_rate": 1.289346182797409e-05, "loss": 1.0043, "step": 70000 }, { "epoch": 2.24, "learning_rate": 1.2628415126745333e-05, "loss": 0.9913, "step": 70500 }, { "epoch": 2.26, "learning_rate": 1.2363368425516576e-05, "loss": 0.9986, "step": 71000 }, { "epoch": 2.27, "learning_rate": 1.209832172428782e-05, "loss": 0.9892, "step": 71500 }, { "epoch": 2.29, "learning_rate": 1.1833275023059063e-05, "loss": 0.9995, "step": 72000 }, { "epoch": 2.31, "learning_rate": 1.1568228321830308e-05, "loss": 0.9944, "step": 72500 }, { "epoch": 2.32, "learning_rate": 1.130318162060155e-05, "loss": 0.998, "step": 73000 }, { "epoch": 2.34, "learning_rate": 1.1038134919372795e-05, "loss": 0.9773, "step": 73500 }, { "epoch": 2.35, "learning_rate": 1.0773088218144038e-05, "loss": 0.9928, "step": 74000 }, { "epoch": 2.37, "learning_rate": 1.050804151691528e-05, "loss": 0.9787, "step": 74500 }, { "epoch": 2.39, "learning_rate": 1.0242994815686525e-05, "loss": 0.978, "step": 75000 }, { "epoch": 2.4, "learning_rate": 9.977948114457768e-06, "loss": 0.9757, "step": 75500 }, { "epoch": 2.42, "learning_rate": 9.712901413229012e-06, "loss": 0.9773, "step": 76000 }, { "epoch": 2.43, "learning_rate": 9.447854712000255e-06, "loss": 0.9747, "step": 76500 }, { "epoch": 2.45, "learning_rate": 9.1828080107715e-06, "loss": 0.9703, "step": 77000 }, { "epoch": 2.46, "learning_rate": 8.917761309542742e-06, "loss": 0.9631, "step": 77500 }, { "epoch": 2.48, "learning_rate": 8.652714608313986e-06, "loss": 0.9754, "step": 78000 }, { "epoch": 2.5, "learning_rate": 8.387667907085228e-06, "loss": 0.9834, "step": 78500 }, { "epoch": 2.51, "learning_rate": 8.122621205856472e-06, "loss": 0.9797, "step": 79000 }, { "epoch": 2.53, "learning_rate": 7.857574504627715e-06, "loss": 0.9761, "step": 79500 }, { "epoch": 2.54, "learning_rate": 7.592527803398959e-06, "loss": 0.9689, "step": 80000 }, { "epoch": 2.56, "learning_rate": 7.327481102170203e-06, "loss": 0.9683, "step": 80500 }, { "epoch": 2.58, "learning_rate": 7.062434400941446e-06, "loss": 0.964, "step": 81000 }, { "epoch": 2.59, "learning_rate": 6.79738769971269e-06, "loss": 0.968, "step": 81500 }, { "epoch": 2.61, "learning_rate": 6.532340998483934e-06, "loss": 0.9638, "step": 82000 }, { "epoch": 2.62, "learning_rate": 6.267294297255176e-06, "loss": 0.9651, "step": 82500 }, { "epoch": 2.64, "learning_rate": 6.00224759602642e-06, "loss": 0.9616, "step": 83000 }, { "epoch": 2.66, "learning_rate": 5.737200894797664e-06, "loss": 0.9586, "step": 83500 }, { "epoch": 2.67, "learning_rate": 5.472154193568907e-06, "loss": 0.965, "step": 84000 }, { "epoch": 2.69, "learning_rate": 5.207107492340151e-06, "loss": 0.9616, "step": 84500 }, { "epoch": 2.7, "learning_rate": 4.942060791111394e-06, "loss": 0.961, "step": 85000 }, { "epoch": 2.72, "learning_rate": 4.677014089882638e-06, "loss": 0.9471, "step": 85500 }, { "epoch": 2.74, "learning_rate": 4.411967388653881e-06, "loss": 0.9676, "step": 86000 }, { "epoch": 2.75, "learning_rate": 4.146920687425124e-06, "loss": 0.9451, "step": 86500 }, { "epoch": 2.77, "learning_rate": 3.881873986196368e-06, "loss": 0.9501, "step": 87000 }, { "epoch": 2.78, "learning_rate": 3.6168272849676116e-06, "loss": 0.949, "step": 87500 }, { "epoch": 2.8, "learning_rate": 3.3517805837388548e-06, "loss": 0.9511, "step": 88000 }, { "epoch": 2.81, "learning_rate": 3.0867338825100984e-06, "loss": 0.9452, "step": 88500 }, { "epoch": 2.83, "learning_rate": 2.821687181281342e-06, "loss": 0.9458, "step": 89000 }, { "epoch": 2.85, "learning_rate": 2.556640480052585e-06, "loss": 0.9519, "step": 89500 }, { "epoch": 2.86, "learning_rate": 2.2915937788238288e-06, "loss": 0.9391, "step": 90000 }, { "epoch": 2.88, "learning_rate": 2.0265470775950724e-06, "loss": 0.9483, "step": 90500 }, { "epoch": 2.89, "learning_rate": 1.7615003763663158e-06, "loss": 0.9483, "step": 91000 }, { "epoch": 2.91, "learning_rate": 1.4964536751375594e-06, "loss": 0.9454, "step": 91500 }, { "epoch": 2.93, "learning_rate": 1.2314069739088028e-06, "loss": 0.9371, "step": 92000 }, { "epoch": 2.94, "learning_rate": 9.663602726800462e-07, "loss": 0.9447, "step": 92500 }, { "epoch": 2.96, "learning_rate": 7.013135714512897e-07, "loss": 0.9461, "step": 93000 }, { "epoch": 2.97, "learning_rate": 4.362668702225332e-07, "loss": 0.9397, "step": 93500 }, { "epoch": 2.99, "learning_rate": 1.7122016899377671e-07, "loss": 0.9397, "step": 94000 }, { "epoch": 3.0, "step": 94323, "total_flos": 8.797388561511875e+17, "train_loss": 1.148913297653906, "train_runtime": 104744.2409, "train_samples_per_second": 9.005, "train_steps_per_second": 0.901 } ], "max_steps": 94323, "num_train_epochs": 3, "total_flos": 8.797388561511875e+17, "trial_name": null, "trial_params": null }