{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 60130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.9584234159321475e-05, "loss": 0.6379, "step": 500 }, { "epoch": 0.17, "learning_rate": 4.916846831864294e-05, "loss": 0.4095, "step": 1000 }, { "epoch": 0.25, "learning_rate": 4.875270247796441e-05, "loss": 0.3395, "step": 1500 }, { "epoch": 0.33, "learning_rate": 4.8336936637285885e-05, "loss": 0.3274, "step": 2000 }, { "epoch": 0.42, "learning_rate": 4.792117079660735e-05, "loss": 0.3223, "step": 2500 }, { "epoch": 0.5, "learning_rate": 4.750540495592882e-05, "loss": 0.3417, "step": 3000 }, { "epoch": 0.58, "learning_rate": 4.7089639115250295e-05, "loss": 0.3072, "step": 3500 }, { "epoch": 0.67, "learning_rate": 4.667387327457176e-05, "loss": 0.2396, "step": 4000 }, { "epoch": 0.75, "learning_rate": 4.625810743389323e-05, "loss": 0.2277, "step": 4500 }, { "epoch": 0.83, "learning_rate": 4.5842341593214705e-05, "loss": 0.224, "step": 5000 }, { "epoch": 0.91, "learning_rate": 4.542657575253617e-05, "loss": 0.2246, "step": 5500 }, { "epoch": 1.0, "learning_rate": 4.501080991185764e-05, "loss": 0.2104, "step": 6000 }, { "epoch": 1.08, "learning_rate": 4.4595044071179116e-05, "loss": 0.183, "step": 6500 }, { "epoch": 1.16, "learning_rate": 4.417927823050058e-05, "loss": 0.2578, "step": 7000 }, { "epoch": 1.25, "learning_rate": 4.3763512389822053e-05, "loss": 0.2557, "step": 7500 }, { "epoch": 1.33, "learning_rate": 4.3347746549143526e-05, "loss": 0.2352, "step": 8000 }, { "epoch": 1.41, "learning_rate": 4.2931980708465e-05, "loss": 0.2227, "step": 8500 }, { "epoch": 1.5, "learning_rate": 4.2516214867786464e-05, "loss": 0.1994, "step": 9000 }, { "epoch": 1.58, "learning_rate": 4.2100449027107936e-05, "loss": 0.2145, "step": 9500 }, { "epoch": 1.66, "learning_rate": 4.168468318642941e-05, "loss": 0.1487, "step": 10000 }, { "epoch": 1.75, "learning_rate": 4.1268917345750874e-05, "loss": 0.159, "step": 10500 }, { "epoch": 1.83, "learning_rate": 4.0853151505072346e-05, "loss": 0.1803, "step": 11000 }, { "epoch": 1.91, "learning_rate": 4.043738566439382e-05, "loss": 0.1679, "step": 11500 }, { "epoch": 2.0, "learning_rate": 4.0021619823715284e-05, "loss": 0.1332, "step": 12000 }, { "epoch": 2.08, "learning_rate": 3.9605853983036756e-05, "loss": 0.1288, "step": 12500 }, { "epoch": 2.16, "learning_rate": 3.919008814235823e-05, "loss": 0.1164, "step": 13000 }, { "epoch": 2.25, "learning_rate": 3.8774322301679694e-05, "loss": 0.1237, "step": 13500 }, { "epoch": 2.33, "learning_rate": 3.8358556461001167e-05, "loss": 0.1533, "step": 14000 }, { "epoch": 2.41, "learning_rate": 3.794279062032264e-05, "loss": 0.1282, "step": 14500 }, { "epoch": 2.49, "learning_rate": 3.7527024779644104e-05, "loss": 0.5156, "step": 15000 }, { "epoch": 2.58, "learning_rate": 3.711125893896558e-05, "loss": 0.1689, "step": 15500 }, { "epoch": 2.66, "learning_rate": 3.669549309828705e-05, "loss": 0.2002, "step": 16000 }, { "epoch": 2.74, "learning_rate": 3.6279727257608515e-05, "loss": 0.1226, "step": 16500 }, { "epoch": 2.83, "learning_rate": 3.586396141692999e-05, "loss": 0.1192, "step": 17000 }, { "epoch": 2.91, "learning_rate": 3.544819557625146e-05, "loss": 0.1264, "step": 17500 }, { "epoch": 2.99, "learning_rate": 3.5032429735572925e-05, "loss": 0.138, "step": 18000 }, { "epoch": 3.08, "learning_rate": 3.46166638948944e-05, "loss": 0.116, "step": 18500 }, { "epoch": 3.16, "learning_rate": 3.420089805421587e-05, "loss": 0.168, "step": 19000 }, { "epoch": 3.24, "learning_rate": 3.3785132213537335e-05, "loss": 0.0882, "step": 19500 }, { "epoch": 3.33, "learning_rate": 3.336936637285881e-05, "loss": 0.0959, "step": 20000 }, { "epoch": 3.41, "learning_rate": 3.295360053218028e-05, "loss": 0.0865, "step": 20500 }, { "epoch": 3.49, "learning_rate": 3.2537834691501745e-05, "loss": 0.0853, "step": 21000 }, { "epoch": 3.58, "learning_rate": 3.212206885082322e-05, "loss": 0.0783, "step": 21500 }, { "epoch": 3.66, "learning_rate": 3.170630301014469e-05, "loss": 0.0671, "step": 22000 }, { "epoch": 3.74, "learning_rate": 3.129053716946616e-05, "loss": 0.0762, "step": 22500 }, { "epoch": 3.83, "learning_rate": 3.087477132878763e-05, "loss": 0.0759, "step": 23000 }, { "epoch": 3.91, "learning_rate": 3.04590054881091e-05, "loss": 0.0786, "step": 23500 }, { "epoch": 3.99, "learning_rate": 3.0043239647430572e-05, "loss": 0.0696, "step": 24000 }, { "epoch": 4.07, "learning_rate": 2.9627473806752038e-05, "loss": 0.0561, "step": 24500 }, { "epoch": 4.16, "learning_rate": 2.921170796607351e-05, "loss": 0.0567, "step": 25000 }, { "epoch": 4.24, "learning_rate": 2.8795942125394983e-05, "loss": 0.0553, "step": 25500 }, { "epoch": 4.32, "learning_rate": 2.8380176284716448e-05, "loss": 0.0618, "step": 26000 }, { "epoch": 4.41, "learning_rate": 2.796441044403792e-05, "loss": 0.0601, "step": 26500 }, { "epoch": 4.49, "learning_rate": 2.7548644603359393e-05, "loss": 0.0572, "step": 27000 }, { "epoch": 4.57, "learning_rate": 2.713287876268086e-05, "loss": 0.055, "step": 27500 }, { "epoch": 4.66, "learning_rate": 2.671711292200233e-05, "loss": 0.0592, "step": 28000 }, { "epoch": 4.74, "learning_rate": 2.6301347081323803e-05, "loss": 0.0582, "step": 28500 }, { "epoch": 4.82, "learning_rate": 2.588558124064527e-05, "loss": 0.0605, "step": 29000 }, { "epoch": 4.91, "learning_rate": 2.546981539996674e-05, "loss": 0.0511, "step": 29500 }, { "epoch": 4.99, "learning_rate": 2.5054049559288213e-05, "loss": 0.052, "step": 30000 }, { "epoch": 5.07, "learning_rate": 2.4638283718609682e-05, "loss": 0.0387, "step": 30500 }, { "epoch": 5.16, "learning_rate": 2.422251787793115e-05, "loss": 0.0408, "step": 31000 }, { "epoch": 5.24, "learning_rate": 2.380675203725262e-05, "loss": 0.038, "step": 31500 }, { "epoch": 5.32, "learning_rate": 2.3390986196574092e-05, "loss": 0.0415, "step": 32000 }, { "epoch": 5.4, "learning_rate": 2.297522035589556e-05, "loss": 0.0529, "step": 32500 }, { "epoch": 5.49, "learning_rate": 2.2559454515217034e-05, "loss": 0.0436, "step": 33000 }, { "epoch": 5.57, "learning_rate": 2.2143688674538503e-05, "loss": 0.0388, "step": 33500 }, { "epoch": 5.65, "learning_rate": 2.172792283385997e-05, "loss": 0.0362, "step": 34000 }, { "epoch": 5.74, "learning_rate": 2.1312156993181444e-05, "loss": 0.0369, "step": 34500 }, { "epoch": 5.82, "learning_rate": 2.0896391152502913e-05, "loss": 0.0385, "step": 35000 }, { "epoch": 5.9, "learning_rate": 2.048062531182438e-05, "loss": 0.0391, "step": 35500 }, { "epoch": 5.99, "learning_rate": 2.0064859471145854e-05, "loss": 0.0395, "step": 36000 }, { "epoch": 6.07, "learning_rate": 1.9649093630467323e-05, "loss": 0.0262, "step": 36500 }, { "epoch": 6.15, "learning_rate": 1.9233327789788792e-05, "loss": 0.0473, "step": 37000 }, { "epoch": 6.24, "learning_rate": 1.8817561949110264e-05, "loss": 0.0306, "step": 37500 }, { "epoch": 6.32, "learning_rate": 1.8401796108431733e-05, "loss": 0.0258, "step": 38000 }, { "epoch": 6.4, "learning_rate": 1.7986030267753202e-05, "loss": 0.0284, "step": 38500 }, { "epoch": 6.49, "learning_rate": 1.7570264427074674e-05, "loss": 0.0223, "step": 39000 }, { "epoch": 6.57, "learning_rate": 1.7154498586396143e-05, "loss": 0.0259, "step": 39500 }, { "epoch": 6.65, "learning_rate": 1.6738732745717612e-05, "loss": 0.0282, "step": 40000 }, { "epoch": 6.74, "learning_rate": 1.6322966905039085e-05, "loss": 0.024, "step": 40500 }, { "epoch": 6.82, "learning_rate": 1.5907201064360554e-05, "loss": 0.026, "step": 41000 }, { "epoch": 6.9, "learning_rate": 1.5491435223682026e-05, "loss": 0.0263, "step": 41500 }, { "epoch": 6.98, "learning_rate": 1.5075669383003493e-05, "loss": 0.0257, "step": 42000 }, { "epoch": 7.07, "learning_rate": 1.4659903542324962e-05, "loss": 0.0272, "step": 42500 }, { "epoch": 7.15, "learning_rate": 1.4244137701646434e-05, "loss": 0.0194, "step": 43000 }, { "epoch": 7.23, "learning_rate": 1.3828371860967903e-05, "loss": 0.014, "step": 43500 }, { "epoch": 7.32, "learning_rate": 1.3412606020289372e-05, "loss": 0.0151, "step": 44000 }, { "epoch": 7.4, "learning_rate": 1.2996840179610845e-05, "loss": 0.0173, "step": 44500 }, { "epoch": 7.48, "learning_rate": 1.2581074338932313e-05, "loss": 0.0138, "step": 45000 }, { "epoch": 7.57, "learning_rate": 1.2165308498253784e-05, "loss": 0.0245, "step": 45500 }, { "epoch": 7.65, "learning_rate": 1.1749542657575255e-05, "loss": 0.0162, "step": 46000 }, { "epoch": 7.73, "learning_rate": 1.1333776816896724e-05, "loss": 0.0162, "step": 46500 }, { "epoch": 7.82, "learning_rate": 1.0918010976218194e-05, "loss": 0.0158, "step": 47000 }, { "epoch": 7.9, "learning_rate": 1.0502245135539665e-05, "loss": 0.0186, "step": 47500 }, { "epoch": 7.98, "learning_rate": 1.0086479294861134e-05, "loss": 0.0189, "step": 48000 }, { "epoch": 8.07, "learning_rate": 9.670713454182605e-06, "loss": 0.0138, "step": 48500 }, { "epoch": 8.15, "learning_rate": 9.254947613504075e-06, "loss": 0.0076, "step": 49000 }, { "epoch": 8.23, "learning_rate": 8.839181772825546e-06, "loss": 0.0109, "step": 49500 }, { "epoch": 8.32, "learning_rate": 8.423415932147015e-06, "loss": 0.0107, "step": 50000 }, { "epoch": 8.4, "learning_rate": 8.007650091468485e-06, "loss": 0.0086, "step": 50500 }, { "epoch": 8.48, "learning_rate": 7.591884250789956e-06, "loss": 0.0132, "step": 51000 }, { "epoch": 8.56, "learning_rate": 7.176118410111425e-06, "loss": 0.0095, "step": 51500 }, { "epoch": 8.65, "learning_rate": 6.7603525694328956e-06, "loss": 0.0101, "step": 52000 }, { "epoch": 8.73, "learning_rate": 6.344586728754366e-06, "loss": 0.0108, "step": 52500 }, { "epoch": 8.81, "learning_rate": 5.928820888075836e-06, "loss": 0.0088, "step": 53000 }, { "epoch": 8.9, "learning_rate": 5.513055047397306e-06, "loss": 0.0082, "step": 53500 }, { "epoch": 8.98, "learning_rate": 5.097289206718776e-06, "loss": 0.0086, "step": 54000 }, { "epoch": 9.06, "learning_rate": 4.681523366040246e-06, "loss": 0.0053, "step": 54500 }, { "epoch": 9.15, "learning_rate": 4.265757525361717e-06, "loss": 0.0053, "step": 55000 }, { "epoch": 9.23, "learning_rate": 3.849991684683187e-06, "loss": 0.0066, "step": 55500 }, { "epoch": 9.31, "learning_rate": 3.4342258440046572e-06, "loss": 0.0041, "step": 56000 }, { "epoch": 9.4, "learning_rate": 3.018460003326127e-06, "loss": 0.0047, "step": 56500 }, { "epoch": 9.48, "learning_rate": 2.6026941626475972e-06, "loss": 0.0065, "step": 57000 }, { "epoch": 9.56, "learning_rate": 2.1869283219690674e-06, "loss": 0.0059, "step": 57500 }, { "epoch": 9.65, "learning_rate": 1.7711624812905372e-06, "loss": 0.0066, "step": 58000 }, { "epoch": 9.73, "learning_rate": 1.3553966406120074e-06, "loss": 0.0043, "step": 58500 }, { "epoch": 9.81, "learning_rate": 9.396307999334775e-07, "loss": 0.0055, "step": 59000 }, { "epoch": 9.9, "learning_rate": 5.238649592549476e-07, "loss": 0.0042, "step": 59500 }, { "epoch": 9.98, "learning_rate": 1.0809911857641776e-07, "loss": 0.0031, "step": 60000 }, { "epoch": 10.0, "step": 60130, "total_flos": 2.8632348630306816e+17, "train_runtime": 40850.056, "train_samples_per_second": 1.472 } ], "max_steps": 60130, "num_train_epochs": 10, "total_flos": 2.8632348630306816e+17, "trial_name": null, "trial_params": null }