Davlan's picture
Upload trainer_state.json
b2c511e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 94323,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 4.973495329877125e-05,
"loss": 1.7869,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 4.9469906597542485e-05,
"loss": 1.7591,
"step": 1000
},
{
"epoch": 0.05,
"learning_rate": 4.920485989631373e-05,
"loss": 1.7192,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 4.893981319508498e-05,
"loss": 1.6754,
"step": 2000
},
{
"epoch": 0.08,
"learning_rate": 4.867476649385622e-05,
"loss": 1.6344,
"step": 2500
},
{
"epoch": 0.1,
"learning_rate": 4.840971979262746e-05,
"loss": 1.6237,
"step": 3000
},
{
"epoch": 0.11,
"learning_rate": 4.8144673091398703e-05,
"loss": 1.5965,
"step": 3500
},
{
"epoch": 0.13,
"learning_rate": 4.787962639016995e-05,
"loss": 1.5753,
"step": 4000
},
{
"epoch": 0.14,
"learning_rate": 4.761457968894119e-05,
"loss": 1.5598,
"step": 4500
},
{
"epoch": 0.16,
"learning_rate": 4.734953298771243e-05,
"loss": 1.5247,
"step": 5000
},
{
"epoch": 0.17,
"learning_rate": 4.708448628648368e-05,
"loss": 1.5149,
"step": 5500
},
{
"epoch": 0.19,
"learning_rate": 4.681943958525493e-05,
"loss": 1.5089,
"step": 6000
},
{
"epoch": 0.21,
"learning_rate": 4.655439288402616e-05,
"loss": 1.4888,
"step": 6500
},
{
"epoch": 0.22,
"learning_rate": 4.628934618279741e-05,
"loss": 1.4738,
"step": 7000
},
{
"epoch": 0.24,
"learning_rate": 4.602429948156866e-05,
"loss": 1.4535,
"step": 7500
},
{
"epoch": 0.25,
"learning_rate": 4.57592527803399e-05,
"loss": 1.474,
"step": 8000
},
{
"epoch": 0.27,
"learning_rate": 4.549420607911114e-05,
"loss": 1.4491,
"step": 8500
},
{
"epoch": 0.29,
"learning_rate": 4.522915937788238e-05,
"loss": 1.4336,
"step": 9000
},
{
"epoch": 0.3,
"learning_rate": 4.496411267665363e-05,
"loss": 1.4323,
"step": 9500
},
{
"epoch": 0.32,
"learning_rate": 4.469906597542487e-05,
"loss": 1.4189,
"step": 10000
},
{
"epoch": 0.33,
"learning_rate": 4.443401927419611e-05,
"loss": 1.4101,
"step": 10500
},
{
"epoch": 0.35,
"learning_rate": 4.416897257296736e-05,
"loss": 1.3992,
"step": 11000
},
{
"epoch": 0.37,
"learning_rate": 4.390392587173861e-05,
"loss": 1.3975,
"step": 11500
},
{
"epoch": 0.38,
"learning_rate": 4.363887917050984e-05,
"loss": 1.3845,
"step": 12000
},
{
"epoch": 0.4,
"learning_rate": 4.337383246928109e-05,
"loss": 1.3795,
"step": 12500
},
{
"epoch": 0.41,
"learning_rate": 4.310878576805233e-05,
"loss": 1.3867,
"step": 13000
},
{
"epoch": 0.43,
"learning_rate": 4.284373906682357e-05,
"loss": 1.3647,
"step": 13500
},
{
"epoch": 0.45,
"learning_rate": 4.257869236559482e-05,
"loss": 1.3638,
"step": 14000
},
{
"epoch": 0.46,
"learning_rate": 4.231364566436606e-05,
"loss": 1.3507,
"step": 14500
},
{
"epoch": 0.48,
"learning_rate": 4.204859896313731e-05,
"loss": 1.3462,
"step": 15000
},
{
"epoch": 0.49,
"learning_rate": 4.178355226190855e-05,
"loss": 1.3511,
"step": 15500
},
{
"epoch": 0.51,
"learning_rate": 4.151850556067979e-05,
"loss": 1.3384,
"step": 16000
},
{
"epoch": 0.52,
"learning_rate": 4.125345885945104e-05,
"loss": 1.319,
"step": 16500
},
{
"epoch": 0.54,
"learning_rate": 4.098841215822228e-05,
"loss": 1.3331,
"step": 17000
},
{
"epoch": 0.56,
"learning_rate": 4.072336545699352e-05,
"loss": 1.3119,
"step": 17500
},
{
"epoch": 0.57,
"learning_rate": 4.045831875576477e-05,
"loss": 1.32,
"step": 18000
},
{
"epoch": 0.59,
"learning_rate": 4.019327205453601e-05,
"loss": 1.3224,
"step": 18500
},
{
"epoch": 0.6,
"learning_rate": 3.992822535330725e-05,
"loss": 1.2868,
"step": 19000
},
{
"epoch": 0.62,
"learning_rate": 3.96631786520785e-05,
"loss": 1.2955,
"step": 19500
},
{
"epoch": 0.64,
"learning_rate": 3.939813195084974e-05,
"loss": 1.2932,
"step": 20000
},
{
"epoch": 0.65,
"learning_rate": 3.913308524962099e-05,
"loss": 1.277,
"step": 20500
},
{
"epoch": 0.67,
"learning_rate": 3.886803854839223e-05,
"loss": 1.2759,
"step": 21000
},
{
"epoch": 0.68,
"learning_rate": 3.860299184716347e-05,
"loss": 1.2793,
"step": 21500
},
{
"epoch": 0.7,
"learning_rate": 3.833794514593472e-05,
"loss": 1.2614,
"step": 22000
},
{
"epoch": 0.72,
"learning_rate": 3.807289844470596e-05,
"loss": 1.2768,
"step": 22500
},
{
"epoch": 0.73,
"learning_rate": 3.78078517434772e-05,
"loss": 1.2615,
"step": 23000
},
{
"epoch": 0.75,
"learning_rate": 3.754280504224845e-05,
"loss": 1.2525,
"step": 23500
},
{
"epoch": 0.76,
"learning_rate": 3.727775834101969e-05,
"loss": 1.2593,
"step": 24000
},
{
"epoch": 0.78,
"learning_rate": 3.701271163979093e-05,
"loss": 1.2435,
"step": 24500
},
{
"epoch": 0.8,
"learning_rate": 3.674766493856218e-05,
"loss": 1.241,
"step": 25000
},
{
"epoch": 0.81,
"learning_rate": 3.648261823733342e-05,
"loss": 1.2449,
"step": 25500
},
{
"epoch": 0.83,
"learning_rate": 3.621757153610466e-05,
"loss": 1.2229,
"step": 26000
},
{
"epoch": 0.84,
"learning_rate": 3.595252483487591e-05,
"loss": 1.2296,
"step": 26500
},
{
"epoch": 0.86,
"learning_rate": 3.568747813364715e-05,
"loss": 1.2257,
"step": 27000
},
{
"epoch": 0.87,
"learning_rate": 3.5422431432418397e-05,
"loss": 1.2292,
"step": 27500
},
{
"epoch": 0.89,
"learning_rate": 3.515738473118964e-05,
"loss": 1.2351,
"step": 28000
},
{
"epoch": 0.91,
"learning_rate": 3.489233802996088e-05,
"loss": 1.2179,
"step": 28500
},
{
"epoch": 0.92,
"learning_rate": 3.4627291328732127e-05,
"loss": 1.2179,
"step": 29000
},
{
"epoch": 0.94,
"learning_rate": 3.436224462750337e-05,
"loss": 1.2057,
"step": 29500
},
{
"epoch": 0.95,
"learning_rate": 3.409719792627461e-05,
"loss": 1.2081,
"step": 30000
},
{
"epoch": 0.97,
"learning_rate": 3.3832151225045856e-05,
"loss": 1.2069,
"step": 30500
},
{
"epoch": 0.99,
"learning_rate": 3.35671045238171e-05,
"loss": 1.1903,
"step": 31000
},
{
"epoch": 1.0,
"learning_rate": 3.330205782258834e-05,
"loss": 1.1958,
"step": 31500
},
{
"epoch": 1.02,
"learning_rate": 3.3037011121359586e-05,
"loss": 1.1746,
"step": 32000
},
{
"epoch": 1.03,
"learning_rate": 3.277196442013083e-05,
"loss": 1.1648,
"step": 32500
},
{
"epoch": 1.05,
"learning_rate": 3.2506917718902075e-05,
"loss": 1.1615,
"step": 33000
},
{
"epoch": 1.07,
"learning_rate": 3.2241871017673316e-05,
"loss": 1.1732,
"step": 33500
},
{
"epoch": 1.08,
"learning_rate": 3.197682431644456e-05,
"loss": 1.1664,
"step": 34000
},
{
"epoch": 1.1,
"learning_rate": 3.1711777615215805e-05,
"loss": 1.1639,
"step": 34500
},
{
"epoch": 1.11,
"learning_rate": 3.1446730913987046e-05,
"loss": 1.1507,
"step": 35000
},
{
"epoch": 1.13,
"learning_rate": 3.118168421275829e-05,
"loss": 1.1577,
"step": 35500
},
{
"epoch": 1.15,
"learning_rate": 3.0916637511529535e-05,
"loss": 1.1563,
"step": 36000
},
{
"epoch": 1.16,
"learning_rate": 3.0651590810300776e-05,
"loss": 1.1528,
"step": 36500
},
{
"epoch": 1.18,
"learning_rate": 3.0386544109072017e-05,
"loss": 1.1461,
"step": 37000
},
{
"epoch": 1.19,
"learning_rate": 3.0121497407843262e-05,
"loss": 1.1574,
"step": 37500
},
{
"epoch": 1.21,
"learning_rate": 2.985645070661451e-05,
"loss": 1.1503,
"step": 38000
},
{
"epoch": 1.22,
"learning_rate": 2.9591404005385747e-05,
"loss": 1.1522,
"step": 38500
},
{
"epoch": 1.24,
"learning_rate": 2.9326357304156992e-05,
"loss": 1.1451,
"step": 39000
},
{
"epoch": 1.26,
"learning_rate": 2.9061310602928236e-05,
"loss": 1.1335,
"step": 39500
},
{
"epoch": 1.27,
"learning_rate": 2.8796263901699484e-05,
"loss": 1.1383,
"step": 40000
},
{
"epoch": 1.29,
"learning_rate": 2.8531217200470722e-05,
"loss": 1.1363,
"step": 40500
},
{
"epoch": 1.3,
"learning_rate": 2.8266170499241966e-05,
"loss": 1.1277,
"step": 41000
},
{
"epoch": 1.32,
"learning_rate": 2.800112379801321e-05,
"loss": 1.1332,
"step": 41500
},
{
"epoch": 1.34,
"learning_rate": 2.773607709678446e-05,
"loss": 1.1249,
"step": 42000
},
{
"epoch": 1.35,
"learning_rate": 2.7471030395555696e-05,
"loss": 1.1283,
"step": 42500
},
{
"epoch": 1.37,
"learning_rate": 2.720598369432694e-05,
"loss": 1.1177,
"step": 43000
},
{
"epoch": 1.38,
"learning_rate": 2.694093699309819e-05,
"loss": 1.1023,
"step": 43500
},
{
"epoch": 1.4,
"learning_rate": 2.6675890291869426e-05,
"loss": 1.1245,
"step": 44000
},
{
"epoch": 1.42,
"learning_rate": 2.641084359064067e-05,
"loss": 1.1194,
"step": 44500
},
{
"epoch": 1.43,
"learning_rate": 2.6145796889411915e-05,
"loss": 1.1132,
"step": 45000
},
{
"epoch": 1.45,
"learning_rate": 2.5880750188183163e-05,
"loss": 1.1073,
"step": 45500
},
{
"epoch": 1.46,
"learning_rate": 2.56157034869544e-05,
"loss": 1.0974,
"step": 46000
},
{
"epoch": 1.48,
"learning_rate": 2.5350656785725645e-05,
"loss": 1.1003,
"step": 46500
},
{
"epoch": 1.49,
"learning_rate": 2.508561008449689e-05,
"loss": 1.0966,
"step": 47000
},
{
"epoch": 1.51,
"learning_rate": 2.4820563383268134e-05,
"loss": 1.0928,
"step": 47500
},
{
"epoch": 1.53,
"learning_rate": 2.4555516682039375e-05,
"loss": 1.0868,
"step": 48000
},
{
"epoch": 1.54,
"learning_rate": 2.429046998081062e-05,
"loss": 1.0902,
"step": 48500
},
{
"epoch": 1.56,
"learning_rate": 2.4025423279581864e-05,
"loss": 1.0978,
"step": 49000
},
{
"epoch": 1.57,
"learning_rate": 2.376037657835311e-05,
"loss": 1.0828,
"step": 49500
},
{
"epoch": 1.59,
"learning_rate": 2.349532987712435e-05,
"loss": 1.1016,
"step": 50000
},
{
"epoch": 1.61,
"learning_rate": 2.3230283175895594e-05,
"loss": 1.0892,
"step": 50500
},
{
"epoch": 1.62,
"learning_rate": 2.296523647466684e-05,
"loss": 1.0832,
"step": 51000
},
{
"epoch": 1.64,
"learning_rate": 2.270018977343808e-05,
"loss": 1.0668,
"step": 51500
},
{
"epoch": 1.65,
"learning_rate": 2.2435143072209324e-05,
"loss": 1.0756,
"step": 52000
},
{
"epoch": 1.67,
"learning_rate": 2.217009637098057e-05,
"loss": 1.0714,
"step": 52500
},
{
"epoch": 1.69,
"learning_rate": 2.1905049669751813e-05,
"loss": 1.0592,
"step": 53000
},
{
"epoch": 1.7,
"learning_rate": 2.1640002968523054e-05,
"loss": 1.07,
"step": 53500
},
{
"epoch": 1.72,
"learning_rate": 2.13749562672943e-05,
"loss": 1.0548,
"step": 54000
},
{
"epoch": 1.73,
"learning_rate": 2.1109909566065543e-05,
"loss": 1.0589,
"step": 54500
},
{
"epoch": 1.75,
"learning_rate": 2.0844862864836784e-05,
"loss": 1.0662,
"step": 55000
},
{
"epoch": 1.77,
"learning_rate": 2.057981616360803e-05,
"loss": 1.0678,
"step": 55500
},
{
"epoch": 1.78,
"learning_rate": 2.0314769462379273e-05,
"loss": 1.0545,
"step": 56000
},
{
"epoch": 1.8,
"learning_rate": 2.0049722761150517e-05,
"loss": 1.0486,
"step": 56500
},
{
"epoch": 1.81,
"learning_rate": 1.978467605992176e-05,
"loss": 1.051,
"step": 57000
},
{
"epoch": 1.83,
"learning_rate": 1.9519629358693003e-05,
"loss": 1.0389,
"step": 57500
},
{
"epoch": 1.84,
"learning_rate": 1.9254582657464247e-05,
"loss": 1.0445,
"step": 58000
},
{
"epoch": 1.86,
"learning_rate": 1.8989535956235492e-05,
"loss": 1.0511,
"step": 58500
},
{
"epoch": 1.88,
"learning_rate": 1.8724489255006733e-05,
"loss": 1.0384,
"step": 59000
},
{
"epoch": 1.89,
"learning_rate": 1.8459442553777974e-05,
"loss": 1.0386,
"step": 59500
},
{
"epoch": 1.91,
"learning_rate": 1.819439585254922e-05,
"loss": 1.0321,
"step": 60000
},
{
"epoch": 1.92,
"learning_rate": 1.7929349151320463e-05,
"loss": 1.0424,
"step": 60500
},
{
"epoch": 1.94,
"learning_rate": 1.7664302450091707e-05,
"loss": 1.0421,
"step": 61000
},
{
"epoch": 1.96,
"learning_rate": 1.7399255748862948e-05,
"loss": 1.0334,
"step": 61500
},
{
"epoch": 1.97,
"learning_rate": 1.7134209047634196e-05,
"loss": 1.0385,
"step": 62000
},
{
"epoch": 1.99,
"learning_rate": 1.6869162346405437e-05,
"loss": 1.0265,
"step": 62500
},
{
"epoch": 2.0,
"learning_rate": 1.660411564517668e-05,
"loss": 1.0245,
"step": 63000
},
{
"epoch": 2.02,
"learning_rate": 1.6339068943947926e-05,
"loss": 1.0146,
"step": 63500
},
{
"epoch": 2.04,
"learning_rate": 1.6074022242719167e-05,
"loss": 1.0161,
"step": 64000
},
{
"epoch": 2.05,
"learning_rate": 1.580897554149041e-05,
"loss": 1.0196,
"step": 64500
},
{
"epoch": 2.07,
"learning_rate": 1.5543928840261653e-05,
"loss": 1.0118,
"step": 65000
},
{
"epoch": 2.08,
"learning_rate": 1.52788821390329e-05,
"loss": 0.9996,
"step": 65500
},
{
"epoch": 2.1,
"learning_rate": 1.5013835437804142e-05,
"loss": 1.0026,
"step": 66000
},
{
"epoch": 2.12,
"learning_rate": 1.4748788736575386e-05,
"loss": 1.0156,
"step": 66500
},
{
"epoch": 2.13,
"learning_rate": 1.4483742035346629e-05,
"loss": 1.0017,
"step": 67000
},
{
"epoch": 2.15,
"learning_rate": 1.4218695334117873e-05,
"loss": 1.0129,
"step": 67500
},
{
"epoch": 2.16,
"learning_rate": 1.3953648632889116e-05,
"loss": 1.0093,
"step": 68000
},
{
"epoch": 2.18,
"learning_rate": 1.3688601931660359e-05,
"loss": 0.9939,
"step": 68500
},
{
"epoch": 2.19,
"learning_rate": 1.3423555230431603e-05,
"loss": 0.9891,
"step": 69000
},
{
"epoch": 2.21,
"learning_rate": 1.3158508529202846e-05,
"loss": 0.9978,
"step": 69500
},
{
"epoch": 2.23,
"learning_rate": 1.289346182797409e-05,
"loss": 1.0043,
"step": 70000
},
{
"epoch": 2.24,
"learning_rate": 1.2628415126745333e-05,
"loss": 0.9913,
"step": 70500
},
{
"epoch": 2.26,
"learning_rate": 1.2363368425516576e-05,
"loss": 0.9986,
"step": 71000
},
{
"epoch": 2.27,
"learning_rate": 1.209832172428782e-05,
"loss": 0.9892,
"step": 71500
},
{
"epoch": 2.29,
"learning_rate": 1.1833275023059063e-05,
"loss": 0.9995,
"step": 72000
},
{
"epoch": 2.31,
"learning_rate": 1.1568228321830308e-05,
"loss": 0.9944,
"step": 72500
},
{
"epoch": 2.32,
"learning_rate": 1.130318162060155e-05,
"loss": 0.998,
"step": 73000
},
{
"epoch": 2.34,
"learning_rate": 1.1038134919372795e-05,
"loss": 0.9773,
"step": 73500
},
{
"epoch": 2.35,
"learning_rate": 1.0773088218144038e-05,
"loss": 0.9928,
"step": 74000
},
{
"epoch": 2.37,
"learning_rate": 1.050804151691528e-05,
"loss": 0.9787,
"step": 74500
},
{
"epoch": 2.39,
"learning_rate": 1.0242994815686525e-05,
"loss": 0.978,
"step": 75000
},
{
"epoch": 2.4,
"learning_rate": 9.977948114457768e-06,
"loss": 0.9757,
"step": 75500
},
{
"epoch": 2.42,
"learning_rate": 9.712901413229012e-06,
"loss": 0.9773,
"step": 76000
},
{
"epoch": 2.43,
"learning_rate": 9.447854712000255e-06,
"loss": 0.9747,
"step": 76500
},
{
"epoch": 2.45,
"learning_rate": 9.1828080107715e-06,
"loss": 0.9703,
"step": 77000
},
{
"epoch": 2.46,
"learning_rate": 8.917761309542742e-06,
"loss": 0.9631,
"step": 77500
},
{
"epoch": 2.48,
"learning_rate": 8.652714608313986e-06,
"loss": 0.9754,
"step": 78000
},
{
"epoch": 2.5,
"learning_rate": 8.387667907085228e-06,
"loss": 0.9834,
"step": 78500
},
{
"epoch": 2.51,
"learning_rate": 8.122621205856472e-06,
"loss": 0.9797,
"step": 79000
},
{
"epoch": 2.53,
"learning_rate": 7.857574504627715e-06,
"loss": 0.9761,
"step": 79500
},
{
"epoch": 2.54,
"learning_rate": 7.592527803398959e-06,
"loss": 0.9689,
"step": 80000
},
{
"epoch": 2.56,
"learning_rate": 7.327481102170203e-06,
"loss": 0.9683,
"step": 80500
},
{
"epoch": 2.58,
"learning_rate": 7.062434400941446e-06,
"loss": 0.964,
"step": 81000
},
{
"epoch": 2.59,
"learning_rate": 6.79738769971269e-06,
"loss": 0.968,
"step": 81500
},
{
"epoch": 2.61,
"learning_rate": 6.532340998483934e-06,
"loss": 0.9638,
"step": 82000
},
{
"epoch": 2.62,
"learning_rate": 6.267294297255176e-06,
"loss": 0.9651,
"step": 82500
},
{
"epoch": 2.64,
"learning_rate": 6.00224759602642e-06,
"loss": 0.9616,
"step": 83000
},
{
"epoch": 2.66,
"learning_rate": 5.737200894797664e-06,
"loss": 0.9586,
"step": 83500
},
{
"epoch": 2.67,
"learning_rate": 5.472154193568907e-06,
"loss": 0.965,
"step": 84000
},
{
"epoch": 2.69,
"learning_rate": 5.207107492340151e-06,
"loss": 0.9616,
"step": 84500
},
{
"epoch": 2.7,
"learning_rate": 4.942060791111394e-06,
"loss": 0.961,
"step": 85000
},
{
"epoch": 2.72,
"learning_rate": 4.677014089882638e-06,
"loss": 0.9471,
"step": 85500
},
{
"epoch": 2.74,
"learning_rate": 4.411967388653881e-06,
"loss": 0.9676,
"step": 86000
},
{
"epoch": 2.75,
"learning_rate": 4.146920687425124e-06,
"loss": 0.9451,
"step": 86500
},
{
"epoch": 2.77,
"learning_rate": 3.881873986196368e-06,
"loss": 0.9501,
"step": 87000
},
{
"epoch": 2.78,
"learning_rate": 3.6168272849676116e-06,
"loss": 0.949,
"step": 87500
},
{
"epoch": 2.8,
"learning_rate": 3.3517805837388548e-06,
"loss": 0.9511,
"step": 88000
},
{
"epoch": 2.81,
"learning_rate": 3.0867338825100984e-06,
"loss": 0.9452,
"step": 88500
},
{
"epoch": 2.83,
"learning_rate": 2.821687181281342e-06,
"loss": 0.9458,
"step": 89000
},
{
"epoch": 2.85,
"learning_rate": 2.556640480052585e-06,
"loss": 0.9519,
"step": 89500
},
{
"epoch": 2.86,
"learning_rate": 2.2915937788238288e-06,
"loss": 0.9391,
"step": 90000
},
{
"epoch": 2.88,
"learning_rate": 2.0265470775950724e-06,
"loss": 0.9483,
"step": 90500
},
{
"epoch": 2.89,
"learning_rate": 1.7615003763663158e-06,
"loss": 0.9483,
"step": 91000
},
{
"epoch": 2.91,
"learning_rate": 1.4964536751375594e-06,
"loss": 0.9454,
"step": 91500
},
{
"epoch": 2.93,
"learning_rate": 1.2314069739088028e-06,
"loss": 0.9371,
"step": 92000
},
{
"epoch": 2.94,
"learning_rate": 9.663602726800462e-07,
"loss": 0.9447,
"step": 92500
},
{
"epoch": 2.96,
"learning_rate": 7.013135714512897e-07,
"loss": 0.9461,
"step": 93000
},
{
"epoch": 2.97,
"learning_rate": 4.362668702225332e-07,
"loss": 0.9397,
"step": 93500
},
{
"epoch": 2.99,
"learning_rate": 1.7122016899377671e-07,
"loss": 0.9397,
"step": 94000
},
{
"epoch": 3.0,
"step": 94323,
"total_flos": 8.797388561511875e+17,
"train_loss": 1.148913297653906,
"train_runtime": 104744.2409,
"train_samples_per_second": 9.005,
"train_steps_per_second": 0.901
}
],
"max_steps": 94323,
"num_train_epochs": 3,
"total_flos": 8.797388561511875e+17,
"trial_name": null,
"trial_params": null
}