qanastek's picture
Upload 43 files
7f8d71b
raw
history blame contribute delete
No virus
23.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 25.0,
"global_step": 95050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-09,
"loss": 10.5513,
"step": 1
},
{
"epoch": 0.13,
"learning_rate": 2.5e-06,
"loss": 9.5944,
"step": 500
},
{
"epoch": 0.26,
"learning_rate": 5e-06,
"loss": 8.0091,
"step": 1000
},
{
"epoch": 0.39,
"learning_rate": 7.5e-06,
"loss": 6.96,
"step": 1500
},
{
"epoch": 0.53,
"learning_rate": 1e-05,
"loss": 6.6546,
"step": 2000
},
{
"epoch": 0.66,
"learning_rate": 1.25e-05,
"loss": 6.4887,
"step": 2500
},
{
"epoch": 0.79,
"learning_rate": 1.5e-05,
"loss": 6.3737,
"step": 3000
},
{
"epoch": 0.92,
"learning_rate": 1.75e-05,
"loss": 6.2809,
"step": 3500
},
{
"epoch": 1.05,
"learning_rate": 2e-05,
"loss": 6.211,
"step": 4000
},
{
"epoch": 1.18,
"learning_rate": 2.25e-05,
"loss": 6.1475,
"step": 4500
},
{
"epoch": 1.32,
"learning_rate": 2.5e-05,
"loss": 6.0923,
"step": 5000
},
{
"epoch": 1.45,
"learning_rate": 2.7500000000000004e-05,
"loss": 6.0436,
"step": 5500
},
{
"epoch": 1.58,
"learning_rate": 3e-05,
"loss": 6.0011,
"step": 6000
},
{
"epoch": 1.71,
"learning_rate": 3.2495000000000007e-05,
"loss": 5.9694,
"step": 6500
},
{
"epoch": 1.84,
"learning_rate": 3.4995e-05,
"loss": 5.9366,
"step": 7000
},
{
"epoch": 1.97,
"learning_rate": 3.7495e-05,
"loss": 5.9044,
"step": 7500
},
{
"epoch": 2.1,
"learning_rate": 3.9995000000000006e-05,
"loss": 5.8788,
"step": 8000
},
{
"epoch": 2.24,
"learning_rate": 4.2495e-05,
"loss": 5.8576,
"step": 8500
},
{
"epoch": 2.37,
"learning_rate": 4.4995000000000005e-05,
"loss": 5.8413,
"step": 9000
},
{
"epoch": 2.5,
"learning_rate": 4.7495e-05,
"loss": 5.818,
"step": 9500
},
{
"epoch": 2.63,
"learning_rate": 4.9995000000000005e-05,
"loss": 5.8055,
"step": 10000
},
{
"epoch": 2.76,
"learning_rate": 4.998348557055865e-05,
"loss": 5.7852,
"step": 10500
},
{
"epoch": 2.89,
"learning_rate": 4.9966971141117293e-05,
"loss": 5.768,
"step": 11000
},
{
"epoch": 3.02,
"learning_rate": 4.995042361662696e-05,
"loss": 5.7555,
"step": 11500
},
{
"epoch": 3.16,
"learning_rate": 4.993387609213662e-05,
"loss": 5.7434,
"step": 12000
},
{
"epoch": 3.29,
"learning_rate": 4.991732856764628e-05,
"loss": 5.7306,
"step": 12500
},
{
"epoch": 3.42,
"learning_rate": 4.9900814138204925e-05,
"loss": 5.7245,
"step": 13000
},
{
"epoch": 3.55,
"learning_rate": 4.988426661371459e-05,
"loss": 5.7136,
"step": 13500
},
{
"epoch": 3.68,
"learning_rate": 4.986771908922425e-05,
"loss": 5.7022,
"step": 14000
},
{
"epoch": 3.81,
"learning_rate": 4.985117156473392e-05,
"loss": 5.6951,
"step": 14500
},
{
"epoch": 3.95,
"learning_rate": 4.983465713529256e-05,
"loss": 5.6872,
"step": 15000
},
{
"epoch": 4.08,
"learning_rate": 4.981810961080223e-05,
"loss": 5.6745,
"step": 15500
},
{
"epoch": 4.21,
"learning_rate": 4.9801562086311895e-05,
"loss": 5.6695,
"step": 16000
},
{
"epoch": 4.34,
"learning_rate": 4.9785014561821555e-05,
"loss": 5.6663,
"step": 16500
},
{
"epoch": 4.47,
"learning_rate": 4.9768500132380194e-05,
"loss": 5.6611,
"step": 17000
},
{
"epoch": 4.6,
"learning_rate": 4.975195260788986e-05,
"loss": 5.6569,
"step": 17500
},
{
"epoch": 4.73,
"learning_rate": 4.9735405083399527e-05,
"loss": 5.6487,
"step": 18000
},
{
"epoch": 4.87,
"learning_rate": 4.9718857558909186e-05,
"loss": 5.6432,
"step": 18500
},
{
"epoch": 5.0,
"learning_rate": 4.970231003441885e-05,
"loss": 5.6399,
"step": 19000
},
{
"epoch": 5.13,
"learning_rate": 4.96857956049775e-05,
"loss": 5.6297,
"step": 19500
},
{
"epoch": 5.26,
"learning_rate": 4.9669248080487165e-05,
"loss": 5.625,
"step": 20000
},
{
"epoch": 5.39,
"learning_rate": 4.9652700555996824e-05,
"loss": 5.6231,
"step": 20500
},
{
"epoch": 5.52,
"learning_rate": 4.963615303150649e-05,
"loss": 5.6183,
"step": 21000
},
{
"epoch": 5.65,
"learning_rate": 4.961963860206513e-05,
"loss": 5.6134,
"step": 21500
},
{
"epoch": 5.79,
"learning_rate": 4.9603091077574796e-05,
"loss": 5.6107,
"step": 22000
},
{
"epoch": 5.92,
"learning_rate": 4.958654355308446e-05,
"loss": 5.6064,
"step": 22500
},
{
"epoch": 6.05,
"learning_rate": 4.956999602859412e-05,
"loss": 5.6008,
"step": 23000
},
{
"epoch": 6.18,
"learning_rate": 4.955348159915277e-05,
"loss": 5.5932,
"step": 23500
},
{
"epoch": 6.31,
"learning_rate": 4.9536934074662434e-05,
"loss": 5.5929,
"step": 24000
},
{
"epoch": 6.44,
"learning_rate": 4.95203865501721e-05,
"loss": 5.5891,
"step": 24500
},
{
"epoch": 6.58,
"learning_rate": 4.9503872120730746e-05,
"loss": 5.5828,
"step": 25000
},
{
"epoch": 6.71,
"learning_rate": 4.9487324596240405e-05,
"loss": 5.5846,
"step": 25500
},
{
"epoch": 6.84,
"learning_rate": 4.947077707175007e-05,
"loss": 5.581,
"step": 26000
},
{
"epoch": 6.97,
"learning_rate": 4.945422954725973e-05,
"loss": 5.5753,
"step": 26500
},
{
"epoch": 7.1,
"learning_rate": 4.943768202276939e-05,
"loss": 5.5719,
"step": 27000
},
{
"epoch": 7.23,
"learning_rate": 4.942113449827906e-05,
"loss": 5.5668,
"step": 27500
},
{
"epoch": 7.36,
"learning_rate": 4.9404586973788724e-05,
"loss": 5.564,
"step": 28000
},
{
"epoch": 7.5,
"learning_rate": 4.938807254434737e-05,
"loss": 5.5632,
"step": 28500
},
{
"epoch": 7.63,
"learning_rate": 4.9371525019857036e-05,
"loss": 5.3943,
"step": 29000
},
{
"epoch": 7.76,
"learning_rate": 4.9354977495366695e-05,
"loss": 5.1613,
"step": 29500
},
{
"epoch": 7.89,
"learning_rate": 4.933842997087636e-05,
"loss": 4.9594,
"step": 30000
},
{
"epoch": 8.02,
"learning_rate": 4.932188244638603e-05,
"loss": 4.7739,
"step": 30500
},
{
"epoch": 8.15,
"learning_rate": 4.930533492189569e-05,
"loss": 4.6014,
"step": 31000
},
{
"epoch": 8.29,
"learning_rate": 4.928878739740535e-05,
"loss": 4.4413,
"step": 31500
},
{
"epoch": 8.42,
"learning_rate": 4.9272239872915014e-05,
"loss": 4.2832,
"step": 32000
},
{
"epoch": 8.55,
"learning_rate": 4.925572544347366e-05,
"loss": 4.1308,
"step": 32500
},
{
"epoch": 8.68,
"learning_rate": 4.923917791898332e-05,
"loss": 3.9864,
"step": 33000
},
{
"epoch": 8.81,
"learning_rate": 4.9222630394492985e-05,
"loss": 3.8488,
"step": 33500
},
{
"epoch": 8.94,
"learning_rate": 4.920608287000265e-05,
"loss": 3.699,
"step": 34000
},
{
"epoch": 9.07,
"learning_rate": 4.918953534551232e-05,
"loss": 3.5521,
"step": 34500
},
{
"epoch": 9.21,
"learning_rate": 4.917302091607096e-05,
"loss": 3.3783,
"step": 35000
},
{
"epoch": 9.34,
"learning_rate": 4.915647339158062e-05,
"loss": 2.8694,
"step": 35500
},
{
"epoch": 9.47,
"learning_rate": 4.913992586709028e-05,
"loss": 2.3898,
"step": 36000
},
{
"epoch": 9.6,
"learning_rate": 4.912337834259995e-05,
"loss": 2.1786,
"step": 36500
},
{
"epoch": 9.73,
"learning_rate": 4.9106863913158595e-05,
"loss": 2.0476,
"step": 37000
},
{
"epoch": 9.86,
"learning_rate": 4.9090316388668254e-05,
"loss": 1.9528,
"step": 37500
},
{
"epoch": 9.99,
"learning_rate": 4.90738019592269e-05,
"loss": 1.8765,
"step": 38000
},
{
"epoch": 10.13,
"learning_rate": 4.9057254434736566e-05,
"loss": 1.8172,
"step": 38500
},
{
"epoch": 10.26,
"learning_rate": 4.904070691024623e-05,
"loss": 1.7658,
"step": 39000
},
{
"epoch": 10.39,
"learning_rate": 4.902415938575589e-05,
"loss": 1.7213,
"step": 39500
},
{
"epoch": 10.52,
"learning_rate": 4.900764495631454e-05,
"loss": 1.6845,
"step": 40000
},
{
"epoch": 10.65,
"learning_rate": 4.8991097431824204e-05,
"loss": 1.6475,
"step": 40500
},
{
"epoch": 10.78,
"learning_rate": 4.8974549907333864e-05,
"loss": 1.6163,
"step": 41000
},
{
"epoch": 10.92,
"learning_rate": 4.8958002382843524e-05,
"loss": 1.5924,
"step": 41500
},
{
"epoch": 11.05,
"learning_rate": 4.894148795340217e-05,
"loss": 1.5644,
"step": 42000
},
{
"epoch": 11.18,
"learning_rate": 4.8924940428911836e-05,
"loss": 1.5381,
"step": 42500
},
{
"epoch": 11.31,
"learning_rate": 4.89083929044215e-05,
"loss": 1.5176,
"step": 43000
},
{
"epoch": 11.44,
"learning_rate": 4.889184537993117e-05,
"loss": 1.4933,
"step": 43500
},
{
"epoch": 11.57,
"learning_rate": 4.887529785544083e-05,
"loss": 1.4755,
"step": 44000
},
{
"epoch": 11.7,
"learning_rate": 4.8858783425999473e-05,
"loss": 1.4564,
"step": 44500
},
{
"epoch": 11.84,
"learning_rate": 4.884223590150914e-05,
"loss": 1.4382,
"step": 45000
},
{
"epoch": 11.97,
"learning_rate": 4.88256883770188e-05,
"loss": 1.4251,
"step": 45500
},
{
"epoch": 12.1,
"learning_rate": 4.880914085252846e-05,
"loss": 1.4069,
"step": 46000
},
{
"epoch": 12.23,
"learning_rate": 4.8792593328038126e-05,
"loss": 1.3901,
"step": 46500
},
{
"epoch": 12.36,
"learning_rate": 4.877604580354779e-05,
"loss": 1.3754,
"step": 47000
},
{
"epoch": 12.49,
"learning_rate": 4.875949827905745e-05,
"loss": 1.3633,
"step": 47500
},
{
"epoch": 12.62,
"learning_rate": 4.874295075456712e-05,
"loss": 1.3459,
"step": 48000
},
{
"epoch": 12.76,
"learning_rate": 4.8726436325125763e-05,
"loss": 1.3374,
"step": 48500
},
{
"epoch": 12.89,
"learning_rate": 4.870992189568441e-05,
"loss": 1.3237,
"step": 49000
},
{
"epoch": 13.02,
"learning_rate": 4.8693374371194075e-05,
"loss": 1.3117,
"step": 49500
},
{
"epoch": 13.15,
"learning_rate": 4.8676826846703735e-05,
"loss": 1.3009,
"step": 50000
},
{
"epoch": 13.28,
"learning_rate": 4.8660279322213395e-05,
"loss": 1.2906,
"step": 50500
},
{
"epoch": 13.41,
"learning_rate": 4.864373179772306e-05,
"loss": 1.2816,
"step": 51000
},
{
"epoch": 13.55,
"learning_rate": 4.8627217368281707e-05,
"loss": 1.2717,
"step": 51500
},
{
"epoch": 13.68,
"learning_rate": 4.861066984379137e-05,
"loss": 1.2648,
"step": 52000
},
{
"epoch": 13.81,
"learning_rate": 4.859412231930103e-05,
"loss": 1.2561,
"step": 52500
},
{
"epoch": 13.94,
"learning_rate": 4.85775747948107e-05,
"loss": 1.2473,
"step": 53000
},
{
"epoch": 14.07,
"learning_rate": 4.8561027270320365e-05,
"loss": 1.2351,
"step": 53500
},
{
"epoch": 14.2,
"learning_rate": 4.854451284087901e-05,
"loss": 1.2292,
"step": 54000
},
{
"epoch": 14.33,
"learning_rate": 4.852796531638867e-05,
"loss": 1.2221,
"step": 54500
},
{
"epoch": 14.47,
"learning_rate": 4.851141779189834e-05,
"loss": 1.2137,
"step": 55000
},
{
"epoch": 14.6,
"learning_rate": 4.8494870267407997e-05,
"loss": 1.2065,
"step": 55500
},
{
"epoch": 14.73,
"learning_rate": 4.847832274291766e-05,
"loss": 1.1981,
"step": 56000
},
{
"epoch": 14.86,
"learning_rate": 4.846180831347631e-05,
"loss": 1.1931,
"step": 56500
},
{
"epoch": 14.99,
"learning_rate": 4.844526078898597e-05,
"loss": 1.1876,
"step": 57000
},
{
"epoch": 15.12,
"learning_rate": 4.8428713264495635e-05,
"loss": 1.1777,
"step": 57500
},
{
"epoch": 15.26,
"learning_rate": 4.84121657400053e-05,
"loss": 1.1734,
"step": 58000
},
{
"epoch": 15.39,
"learning_rate": 4.8395651310563947e-05,
"loss": 1.1639,
"step": 58500
},
{
"epoch": 15.52,
"learning_rate": 4.8379103786073606e-05,
"loss": 1.1619,
"step": 59000
},
{
"epoch": 15.65,
"learning_rate": 4.836255626158327e-05,
"loss": 1.1534,
"step": 59500
},
{
"epoch": 15.78,
"learning_rate": 4.834600873709293e-05,
"loss": 1.1484,
"step": 60000
},
{
"epoch": 15.91,
"learning_rate": 4.832946121260259e-05,
"loss": 1.1453,
"step": 60500
},
{
"epoch": 16.04,
"learning_rate": 4.831291368811226e-05,
"loss": 1.1395,
"step": 61000
},
{
"epoch": 16.18,
"learning_rate": 4.8296366163621925e-05,
"loss": 1.1322,
"step": 61500
},
{
"epoch": 16.31,
"learning_rate": 4.827985173418057e-05,
"loss": 1.1269,
"step": 62000
},
{
"epoch": 16.44,
"learning_rate": 4.826330420969023e-05,
"loss": 1.1231,
"step": 62500
},
{
"epoch": 16.57,
"learning_rate": 4.8246756685199896e-05,
"loss": 1.1167,
"step": 63000
},
{
"epoch": 16.7,
"learning_rate": 4.823020916070956e-05,
"loss": 1.1136,
"step": 63500
},
{
"epoch": 16.83,
"learning_rate": 4.821366163621923e-05,
"loss": 1.107,
"step": 64000
},
{
"epoch": 16.96,
"learning_rate": 4.819714720677787e-05,
"loss": 1.1027,
"step": 64500
},
{
"epoch": 17.1,
"learning_rate": 4.818059968228753e-05,
"loss": 1.0964,
"step": 65000
},
{
"epoch": 17.23,
"learning_rate": 4.8164052157797194e-05,
"loss": 1.0933,
"step": 65500
},
{
"epoch": 17.36,
"learning_rate": 4.814750463330686e-05,
"loss": 1.088,
"step": 66000
},
{
"epoch": 17.49,
"learning_rate": 4.813095710881652e-05,
"loss": 1.0861,
"step": 66500
},
{
"epoch": 17.62,
"learning_rate": 4.8114442679375165e-05,
"loss": 1.0817,
"step": 67000
},
{
"epoch": 17.75,
"learning_rate": 4.809789515488483e-05,
"loss": 1.0783,
"step": 67500
},
{
"epoch": 17.89,
"learning_rate": 4.80813476303945e-05,
"loss": 1.075,
"step": 68000
},
{
"epoch": 18.02,
"learning_rate": 4.8064833200953144e-05,
"loss": 1.0672,
"step": 68500
},
{
"epoch": 18.15,
"learning_rate": 4.80482856764628e-05,
"loss": 1.0647,
"step": 69000
},
{
"epoch": 18.28,
"learning_rate": 4.803173815197247e-05,
"loss": 1.0618,
"step": 69500
},
{
"epoch": 18.41,
"learning_rate": 4.801519062748213e-05,
"loss": 1.057,
"step": 70000
},
{
"epoch": 18.54,
"learning_rate": 4.7998643102991796e-05,
"loss": 1.0528,
"step": 70500
},
{
"epoch": 18.67,
"learning_rate": 4.7982095578501455e-05,
"loss": 1.0503,
"step": 71000
},
{
"epoch": 18.81,
"learning_rate": 4.79655811490601e-05,
"loss": 1.0455,
"step": 71500
},
{
"epoch": 18.94,
"learning_rate": 4.794903362456977e-05,
"loss": 1.0429,
"step": 72000
},
{
"epoch": 19.07,
"learning_rate": 4.7932486100079434e-05,
"loss": 1.0371,
"step": 72500
},
{
"epoch": 19.2,
"learning_rate": 4.791593857558909e-05,
"loss": 1.0341,
"step": 73000
},
{
"epoch": 19.33,
"learning_rate": 4.789939105109876e-05,
"loss": 1.0291,
"step": 73500
},
{
"epoch": 19.46,
"learning_rate": 4.7882876621657405e-05,
"loss": 1.0261,
"step": 74000
},
{
"epoch": 19.59,
"learning_rate": 4.7866329097167065e-05,
"loss": 1.0241,
"step": 74500
},
{
"epoch": 19.73,
"learning_rate": 4.7849781572676724e-05,
"loss": 1.02,
"step": 75000
},
{
"epoch": 19.86,
"learning_rate": 4.783323404818639e-05,
"loss": 1.018,
"step": 75500
},
{
"epoch": 19.99,
"learning_rate": 4.781668652369606e-05,
"loss": 1.0154,
"step": 76000
},
{
"epoch": 20.12,
"learning_rate": 4.7800138999205724e-05,
"loss": 1.0116,
"step": 76500
},
{
"epoch": 20.25,
"learning_rate": 4.778359147471538e-05,
"loss": 1.0065,
"step": 77000
},
{
"epoch": 20.38,
"learning_rate": 4.776704395022505e-05,
"loss": 1.005,
"step": 77500
},
{
"epoch": 20.52,
"learning_rate": 4.7750529520783695e-05,
"loss": 1.0025,
"step": 78000
},
{
"epoch": 20.65,
"learning_rate": 4.773398199629336e-05,
"loss": 1.0008,
"step": 78500
},
{
"epoch": 20.78,
"learning_rate": 4.771743447180302e-05,
"loss": 0.9965,
"step": 79000
},
{
"epoch": 20.91,
"learning_rate": 4.770088694731268e-05,
"loss": 0.9943,
"step": 79500
},
{
"epoch": 21.04,
"learning_rate": 4.7684372517871326e-05,
"loss": 0.9907,
"step": 80000
},
{
"epoch": 21.17,
"learning_rate": 4.766782499338099e-05,
"loss": 0.9882,
"step": 80500
},
{
"epoch": 21.3,
"learning_rate": 4.765127746889065e-05,
"loss": 0.9861,
"step": 81000
},
{
"epoch": 21.44,
"learning_rate": 4.763472994440032e-05,
"loss": 0.9809,
"step": 81500
},
{
"epoch": 21.57,
"learning_rate": 4.7618215514958964e-05,
"loss": 0.9793,
"step": 82000
},
{
"epoch": 21.7,
"learning_rate": 4.760166799046863e-05,
"loss": 0.9778,
"step": 82500
},
{
"epoch": 21.83,
"learning_rate": 4.7585153561027276e-05,
"loss": 0.9756,
"step": 83000
},
{
"epoch": 21.96,
"learning_rate": 4.7568606036536936e-05,
"loss": 0.9732,
"step": 83500
},
{
"epoch": 22.09,
"learning_rate": 4.7552058512046596e-05,
"loss": 0.97,
"step": 84000
},
{
"epoch": 22.23,
"learning_rate": 4.753551098755626e-05,
"loss": 0.9687,
"step": 84500
},
{
"epoch": 22.36,
"learning_rate": 4.751899655811491e-05,
"loss": 0.965,
"step": 85000
},
{
"epoch": 22.49,
"learning_rate": 4.7502449033624574e-05,
"loss": 0.9627,
"step": 85500
},
{
"epoch": 22.62,
"learning_rate": 4.7485901509134233e-05,
"loss": 0.9628,
"step": 86000
},
{
"epoch": 22.75,
"learning_rate": 4.74693539846439e-05,
"loss": 0.9605,
"step": 86500
},
{
"epoch": 22.88,
"learning_rate": 4.7452806460153566e-05,
"loss": 0.9565,
"step": 87000
},
{
"epoch": 23.01,
"learning_rate": 4.7436258935663226e-05,
"loss": 0.9553,
"step": 87500
},
{
"epoch": 23.15,
"learning_rate": 4.741971141117289e-05,
"loss": 0.9518,
"step": 88000
},
{
"epoch": 23.28,
"learning_rate": 4.740316388668256e-05,
"loss": 0.9504,
"step": 88500
},
{
"epoch": 23.41,
"learning_rate": 4.73866494572412e-05,
"loss": 0.9482,
"step": 89000
},
{
"epoch": 23.54,
"learning_rate": 4.7370101932750864e-05,
"loss": 0.946,
"step": 89500
},
{
"epoch": 23.67,
"learning_rate": 4.7353554408260523e-05,
"loss": 0.9442,
"step": 90000
},
{
"epoch": 23.8,
"learning_rate": 4.733700688377019e-05,
"loss": 0.9428,
"step": 90500
},
{
"epoch": 23.93,
"learning_rate": 4.7320492454328835e-05,
"loss": 0.9421,
"step": 91000
},
{
"epoch": 24.07,
"learning_rate": 4.73039449298385e-05,
"loss": 0.9389,
"step": 91500
},
{
"epoch": 24.2,
"learning_rate": 4.728739740534816e-05,
"loss": 0.9374,
"step": 92000
},
{
"epoch": 24.33,
"learning_rate": 4.727084988085783e-05,
"loss": 0.9337,
"step": 92500
},
{
"epoch": 24.46,
"learning_rate": 4.7254302356367494e-05,
"loss": 0.9332,
"step": 93000
},
{
"epoch": 24.59,
"learning_rate": 4.7237754831877154e-05,
"loss": 0.931,
"step": 93500
},
{
"epoch": 24.72,
"learning_rate": 4.722124040243579e-05,
"loss": 0.9301,
"step": 94000
},
{
"epoch": 24.86,
"learning_rate": 4.720469287794546e-05,
"loss": 0.9296,
"step": 94500
},
{
"epoch": 24.99,
"learning_rate": 4.7188145353455125e-05,
"loss": 0.926,
"step": 95000
}
],
"max_steps": 1520800,
"num_train_epochs": 400,
"total_flos": 2.561490423429831e+19,
"trial_name": null,
"trial_params": null
}