{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.43494196701283, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 1.993891264508247e-05, "loss": 0.4281, "step": 500 }, { "epoch": 0.31, "learning_rate": 1.987782529016494e-05, "loss": 0.3603, "step": 1000 }, { "epoch": 0.46, "learning_rate": 1.9816737935247404e-05, "loss": 0.335, "step": 1500 }, { "epoch": 0.61, "learning_rate": 1.9755650580329873e-05, "loss": 0.324, "step": 2000 }, { "epoch": 0.76, "learning_rate": 1.9694563225412342e-05, "loss": 0.3191, "step": 2500 }, { "epoch": 0.92, "learning_rate": 1.963347587049481e-05, "loss": 0.3027, "step": 3000 }, { "epoch": 1.07, "learning_rate": 1.9572388515577276e-05, "loss": 0.2698, "step": 3500 }, { "epoch": 1.22, "learning_rate": 1.9511301160659744e-05, "loss": 0.2289, "step": 4000 }, { "epoch": 1.37, "learning_rate": 1.9450213805742213e-05, "loss": 0.2299, "step": 4500 }, { "epoch": 1.53, "learning_rate": 1.938912645082468e-05, "loss": 0.2369, "step": 5000 }, { "epoch": 1.68, "learning_rate": 1.932803909590715e-05, "loss": 0.241, "step": 5500 }, { "epoch": 1.83, "learning_rate": 1.9266951740989616e-05, "loss": 0.2375, "step": 6000 }, { "epoch": 1.99, "learning_rate": 1.9205864386072085e-05, "loss": 0.2334, "step": 6500 }, { "epoch": 2.14, "learning_rate": 1.9144777031154553e-05, "loss": 0.1654, "step": 7000 }, { "epoch": 2.29, "learning_rate": 1.9083689676237022e-05, "loss": 0.1569, "step": 7500 }, { "epoch": 2.44, "learning_rate": 1.9022602321319487e-05, "loss": 0.1689, "step": 8000 }, { "epoch": 2.6, "learning_rate": 1.8961514966401956e-05, "loss": 0.1641, "step": 8500 }, { "epoch": 2.75, "learning_rate": 1.8900427611484425e-05, "loss": 0.1654, "step": 9000 }, { "epoch": 2.9, "learning_rate": 1.883934025656689e-05, "loss": 0.1684, "step": 9500 }, { "epoch": 3.05, "learning_rate": 1.8778252901649362e-05, "loss": 0.1484, "step": 10000 }, { "epoch": 3.21, "learning_rate": 1.8717165546731827e-05, "loss": 0.1118, "step": 10500 }, { "epoch": 3.36, "learning_rate": 1.8656078191814296e-05, "loss": 0.1181, "step": 11000 }, { "epoch": 3.51, "learning_rate": 1.8594990836896765e-05, "loss": 0.118, "step": 11500 }, { "epoch": 3.67, "learning_rate": 1.853390348197923e-05, "loss": 0.1204, "step": 12000 }, { "epoch": 3.82, "learning_rate": 1.84728161270617e-05, "loss": 0.1255, "step": 12500 }, { "epoch": 3.97, "learning_rate": 1.8411728772144168e-05, "loss": 0.1284, "step": 13000 }, { "epoch": 4.12, "learning_rate": 1.8350641417226636e-05, "loss": 0.0925, "step": 13500 }, { "epoch": 4.28, "learning_rate": 1.82895540623091e-05, "loss": 0.0896, "step": 14000 }, { "epoch": 4.43, "learning_rate": 1.8228466707391574e-05, "loss": 0.0897, "step": 14500 }, { "epoch": 4.58, "learning_rate": 1.816737935247404e-05, "loss": 0.0969, "step": 15000 }, { "epoch": 4.73, "learning_rate": 1.8106291997556508e-05, "loss": 0.0971, "step": 15500 }, { "epoch": 4.89, "learning_rate": 1.8045204642638976e-05, "loss": 0.0963, "step": 16000 }, { "epoch": 5.04, "learning_rate": 1.7984117287721442e-05, "loss": 0.0906, "step": 16500 }, { "epoch": 5.19, "learning_rate": 1.792302993280391e-05, "loss": 0.0717, "step": 17000 }, { "epoch": 5.35, "learning_rate": 1.786194257788638e-05, "loss": 0.0754, "step": 17500 }, { "epoch": 5.5, "learning_rate": 1.7800855222968848e-05, "loss": 0.0709, "step": 18000 }, { "epoch": 5.65, "learning_rate": 1.7739767868051313e-05, "loss": 0.0814, "step": 18500 }, { "epoch": 5.8, "learning_rate": 1.7678680513133785e-05, "loss": 0.0797, "step": 19000 }, { "epoch": 5.96, "learning_rate": 1.761759315821625e-05, "loss": 0.0914, "step": 19500 }, { "epoch": 6.11, "learning_rate": 1.755650580329872e-05, "loss": 0.0648, "step": 20000 }, { "epoch": 6.26, "learning_rate": 1.7495418448381188e-05, "loss": 0.0627, "step": 20500 }, { "epoch": 6.41, "learning_rate": 1.7434331093463653e-05, "loss": 0.061, "step": 21000 }, { "epoch": 6.57, "learning_rate": 1.7373243738546122e-05, "loss": 0.0678, "step": 21500 }, { "epoch": 6.72, "learning_rate": 1.731215638362859e-05, "loss": 0.0656, "step": 22000 }, { "epoch": 6.87, "learning_rate": 1.725106902871106e-05, "loss": 0.0702, "step": 22500 }, { "epoch": 7.03, "learning_rate": 1.7189981673793525e-05, "loss": 0.0716, "step": 23000 }, { "epoch": 7.18, "learning_rate": 1.7128894318875993e-05, "loss": 0.0553, "step": 23500 }, { "epoch": 7.33, "learning_rate": 1.7067806963958462e-05, "loss": 0.0546, "step": 24000 }, { "epoch": 7.48, "learning_rate": 1.700671960904093e-05, "loss": 0.0575, "step": 24500 }, { "epoch": 7.64, "learning_rate": 1.69456322541234e-05, "loss": 0.0558, "step": 25000 }, { "epoch": 7.79, "learning_rate": 1.6884544899205865e-05, "loss": 0.0628, "step": 25500 }, { "epoch": 7.94, "learning_rate": 1.6823457544288334e-05, "loss": 0.06, "step": 26000 }, { "epoch": 8.09, "learning_rate": 1.6762370189370802e-05, "loss": 0.0501, "step": 26500 }, { "epoch": 8.25, "learning_rate": 1.670128283445327e-05, "loss": 0.0465, "step": 27000 }, { "epoch": 8.4, "learning_rate": 1.6640195479535736e-05, "loss": 0.0497, "step": 27500 }, { "epoch": 8.55, "learning_rate": 1.6579108124618205e-05, "loss": 0.0513, "step": 28000 }, { "epoch": 8.7, "learning_rate": 1.6518020769700674e-05, "loss": 0.0526, "step": 28500 }, { "epoch": 8.86, "learning_rate": 1.645693341478314e-05, "loss": 0.0503, "step": 29000 }, { "epoch": 9.01, "learning_rate": 1.639584605986561e-05, "loss": 0.05, "step": 29500 }, { "epoch": 9.16, "learning_rate": 1.6334758704948076e-05, "loss": 0.0371, "step": 30000 }, { "epoch": 9.32, "learning_rate": 1.6273671350030545e-05, "loss": 0.0396, "step": 30500 }, { "epoch": 9.47, "learning_rate": 1.6212583995113014e-05, "loss": 0.0417, "step": 31000 }, { "epoch": 9.62, "learning_rate": 1.6151496640195482e-05, "loss": 0.0453, "step": 31500 }, { "epoch": 9.77, "learning_rate": 1.6090409285277948e-05, "loss": 0.0402, "step": 32000 }, { "epoch": 9.93, "learning_rate": 1.6029321930360416e-05, "loss": 0.0421, "step": 32500 }, { "epoch": 10.08, "learning_rate": 1.5968234575442885e-05, "loss": 0.0389, "step": 33000 }, { "epoch": 10.23, "learning_rate": 1.590714722052535e-05, "loss": 0.0372, "step": 33500 }, { "epoch": 10.38, "learning_rate": 1.5846059865607823e-05, "loss": 0.0346, "step": 34000 }, { "epoch": 10.54, "learning_rate": 1.5784972510690288e-05, "loss": 0.0397, "step": 34500 }, { "epoch": 10.69, "learning_rate": 1.5723885155772757e-05, "loss": 0.0357, "step": 35000 }, { "epoch": 10.84, "learning_rate": 1.5662797800855225e-05, "loss": 0.032, "step": 35500 }, { "epoch": 11.0, "learning_rate": 1.560171044593769e-05, "loss": 0.0474, "step": 36000 }, { "epoch": 11.15, "learning_rate": 1.554062309102016e-05, "loss": 0.0298, "step": 36500 }, { "epoch": 11.3, "learning_rate": 1.5479535736102628e-05, "loss": 0.0355, "step": 37000 }, { "epoch": 11.45, "learning_rate": 1.5418448381185097e-05, "loss": 0.0368, "step": 37500 }, { "epoch": 11.61, "learning_rate": 1.5357361026267562e-05, "loss": 0.0335, "step": 38000 }, { "epoch": 11.76, "learning_rate": 1.5296273671350034e-05, "loss": 0.0328, "step": 38500 }, { "epoch": 11.91, "learning_rate": 1.52351863164325e-05, "loss": 0.0317, "step": 39000 }, { "epoch": 12.06, "learning_rate": 1.5174098961514966e-05, "loss": 0.0346, "step": 39500 }, { "epoch": 12.22, "learning_rate": 1.5113011606597437e-05, "loss": 0.0279, "step": 40000 }, { "epoch": 12.37, "learning_rate": 1.5051924251679904e-05, "loss": 0.0268, "step": 40500 }, { "epoch": 12.52, "learning_rate": 1.4990836896762371e-05, "loss": 0.0324, "step": 41000 }, { "epoch": 12.68, "learning_rate": 1.492974954184484e-05, "loss": 0.0309, "step": 41500 }, { "epoch": 12.83, "learning_rate": 1.4868662186927307e-05, "loss": 0.0316, "step": 42000 }, { "epoch": 12.98, "learning_rate": 1.4807574832009775e-05, "loss": 0.0365, "step": 42500 }, { "epoch": 13.13, "learning_rate": 1.4746487477092244e-05, "loss": 0.0241, "step": 43000 }, { "epoch": 13.29, "learning_rate": 1.4685400122174711e-05, "loss": 0.0276, "step": 43500 }, { "epoch": 13.44, "learning_rate": 1.4624312767257178e-05, "loss": 0.0249, "step": 44000 }, { "epoch": 13.59, "learning_rate": 1.4563225412339648e-05, "loss": 0.0284, "step": 44500 }, { "epoch": 13.74, "learning_rate": 1.4502138057422115e-05, "loss": 0.0277, "step": 45000 }, { "epoch": 13.9, "learning_rate": 1.4441050702504582e-05, "loss": 0.0296, "step": 45500 }, { "epoch": 14.05, "learning_rate": 1.4379963347587051e-05, "loss": 0.0245, "step": 46000 }, { "epoch": 14.2, "learning_rate": 1.4318875992669518e-05, "loss": 0.0227, "step": 46500 }, { "epoch": 14.36, "learning_rate": 1.4257788637751985e-05, "loss": 0.0273, "step": 47000 }, { "epoch": 14.51, "learning_rate": 1.4196701282834456e-05, "loss": 0.0263, "step": 47500 }, { "epoch": 14.66, "learning_rate": 1.4135613927916923e-05, "loss": 0.0243, "step": 48000 }, { "epoch": 14.81, "learning_rate": 1.407452657299939e-05, "loss": 0.0256, "step": 48500 }, { "epoch": 14.97, "learning_rate": 1.4013439218081858e-05, "loss": 0.0285, "step": 49000 }, { "epoch": 15.12, "learning_rate": 1.3952351863164327e-05, "loss": 0.0189, "step": 49500 }, { "epoch": 15.27, "learning_rate": 1.3891264508246794e-05, "loss": 0.0186, "step": 50000 }, { "epoch": 15.42, "learning_rate": 1.3830177153329263e-05, "loss": 0.0272, "step": 50500 }, { "epoch": 15.58, "learning_rate": 1.376908979841173e-05, "loss": 0.0264, "step": 51000 }, { "epoch": 15.73, "learning_rate": 1.3708002443494197e-05, "loss": 0.0239, "step": 51500 }, { "epoch": 15.88, "learning_rate": 1.3646915088576667e-05, "loss": 0.0255, "step": 52000 }, { "epoch": 16.04, "learning_rate": 1.3585827733659134e-05, "loss": 0.0261, "step": 52500 }, { "epoch": 16.19, "learning_rate": 1.3524740378741601e-05, "loss": 0.0239, "step": 53000 }, { "epoch": 16.34, "learning_rate": 1.346365302382407e-05, "loss": 0.0187, "step": 53500 }, { "epoch": 16.49, "learning_rate": 1.3402565668906537e-05, "loss": 0.023, "step": 54000 }, { "epoch": 16.65, "learning_rate": 1.3341478313989005e-05, "loss": 0.0215, "step": 54500 }, { "epoch": 16.8, "learning_rate": 1.3280390959071474e-05, "loss": 0.0264, "step": 55000 }, { "epoch": 16.95, "learning_rate": 1.3219303604153941e-05, "loss": 0.0198, "step": 55500 }, { "epoch": 17.1, "learning_rate": 1.3158216249236408e-05, "loss": 0.0191, "step": 56000 }, { "epoch": 17.26, "learning_rate": 1.3097128894318879e-05, "loss": 0.0174, "step": 56500 }, { "epoch": 17.41, "learning_rate": 1.3036041539401346e-05, "loss": 0.0149, "step": 57000 }, { "epoch": 17.56, "learning_rate": 1.2974954184483813e-05, "loss": 0.019, "step": 57500 }, { "epoch": 17.72, "learning_rate": 1.2913866829566281e-05, "loss": 0.0188, "step": 58000 }, { "epoch": 17.87, "learning_rate": 1.2852779474648748e-05, "loss": 0.022, "step": 58500 }, { "epoch": 18.02, "learning_rate": 1.2791692119731215e-05, "loss": 0.0248, "step": 59000 }, { "epoch": 18.17, "learning_rate": 1.2730604764813686e-05, "loss": 0.0162, "step": 59500 }, { "epoch": 18.33, "learning_rate": 1.2669517409896153e-05, "loss": 0.017, "step": 60000 }, { "epoch": 18.48, "learning_rate": 1.260843005497862e-05, "loss": 0.0149, "step": 60500 }, { "epoch": 18.63, "learning_rate": 1.2547342700061088e-05, "loss": 0.0194, "step": 61000 }, { "epoch": 18.78, "learning_rate": 1.2486255345143557e-05, "loss": 0.0185, "step": 61500 }, { "epoch": 18.94, "learning_rate": 1.2425167990226024e-05, "loss": 0.0161, "step": 62000 }, { "epoch": 19.09, "learning_rate": 1.2364080635308493e-05, "loss": 0.0144, "step": 62500 }, { "epoch": 19.24, "learning_rate": 1.230299328039096e-05, "loss": 0.0141, "step": 63000 }, { "epoch": 19.4, "learning_rate": 1.2241905925473427e-05, "loss": 0.0149, "step": 63500 }, { "epoch": 19.55, "learning_rate": 1.2180818570555897e-05, "loss": 0.0178, "step": 64000 }, { "epoch": 19.7, "learning_rate": 1.2119731215638364e-05, "loss": 0.0159, "step": 64500 }, { "epoch": 19.85, "learning_rate": 1.2058643860720831e-05, "loss": 0.0194, "step": 65000 }, { "epoch": 20.01, "learning_rate": 1.19975565058033e-05, "loss": 0.0196, "step": 65500 }, { "epoch": 20.16, "learning_rate": 1.1936469150885767e-05, "loss": 0.0123, "step": 66000 }, { "epoch": 20.31, "learning_rate": 1.1875381795968236e-05, "loss": 0.0133, "step": 66500 }, { "epoch": 20.46, "learning_rate": 1.1814294441050704e-05, "loss": 0.0188, "step": 67000 }, { "epoch": 20.62, "learning_rate": 1.1753207086133171e-05, "loss": 0.0141, "step": 67500 }, { "epoch": 20.77, "learning_rate": 1.1692119731215638e-05, "loss": 0.0176, "step": 68000 }, { "epoch": 20.92, "learning_rate": 1.1631032376298109e-05, "loss": 0.0151, "step": 68500 }, { "epoch": 21.08, "learning_rate": 1.1569945021380576e-05, "loss": 0.0151, "step": 69000 }, { "epoch": 21.23, "learning_rate": 1.1508857666463043e-05, "loss": 0.0085, "step": 69500 }, { "epoch": 21.38, "learning_rate": 1.1447770311545512e-05, "loss": 0.0165, "step": 70000 }, { "epoch": 21.53, "learning_rate": 1.1386682956627979e-05, "loss": 0.015, "step": 70500 }, { "epoch": 21.69, "learning_rate": 1.1325595601710446e-05, "loss": 0.0165, "step": 71000 }, { "epoch": 21.84, "learning_rate": 1.1264508246792916e-05, "loss": 0.0139, "step": 71500 }, { "epoch": 21.99, "learning_rate": 1.1203420891875383e-05, "loss": 0.0152, "step": 72000 }, { "epoch": 22.14, "learning_rate": 1.114233353695785e-05, "loss": 0.0116, "step": 72500 }, { "epoch": 22.3, "learning_rate": 1.1081246182040319e-05, "loss": 0.0134, "step": 73000 }, { "epoch": 22.45, "learning_rate": 1.1020158827122787e-05, "loss": 0.0146, "step": 73500 }, { "epoch": 22.6, "learning_rate": 1.0959071472205254e-05, "loss": 0.0148, "step": 74000 }, { "epoch": 22.76, "learning_rate": 1.0897984117287723e-05, "loss": 0.0124, "step": 74500 }, { "epoch": 22.91, "learning_rate": 1.083689676237019e-05, "loss": 0.013, "step": 75000 }, { "epoch": 23.06, "learning_rate": 1.0775809407452657e-05, "loss": 0.0124, "step": 75500 }, { "epoch": 23.21, "learning_rate": 1.0714722052535128e-05, "loss": 0.0092, "step": 76000 }, { "epoch": 23.37, "learning_rate": 1.0653634697617595e-05, "loss": 0.014, "step": 76500 }, { "epoch": 23.52, "learning_rate": 1.0592547342700062e-05, "loss": 0.0124, "step": 77000 }, { "epoch": 23.67, "learning_rate": 1.053145998778253e-05, "loss": 0.0116, "step": 77500 }, { "epoch": 23.82, "learning_rate": 1.0470372632864997e-05, "loss": 0.01, "step": 78000 }, { "epoch": 23.98, "learning_rate": 1.0409285277947466e-05, "loss": 0.0146, "step": 78500 }, { "epoch": 24.13, "learning_rate": 1.0348197923029935e-05, "loss": 0.011, "step": 79000 }, { "epoch": 24.28, "learning_rate": 1.0287110568112402e-05, "loss": 0.0101, "step": 79500 }, { "epoch": 24.43, "learning_rate": 1.0226023213194869e-05, "loss": 0.009, "step": 80000 } ], "max_steps": 163700, "num_train_epochs": 50, "total_flos": 2.152034161253376e+17, "trial_name": null, "trial_params": null }