{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4825796886582654, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 1.339092493057251, "learning_rate": 5.6012058970266934e-05, "loss": 1.6822, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 1.4976561069488525, "learning_rate": 7.287336883921704e-05, "loss": 1.3895, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 14.99, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.5813677310943604, "learning_rate": 8.273660282559241e-05, "loss": 1.2399, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.3463669717311859, "learning_rate": 8.973467870816715e-05, "loss": 1.2044, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.3429594039916992, "learning_rate": 9.516280807158375e-05, "loss": 1.1798, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.3272635042667389, "learning_rate": 9.959791269454252e-05, "loss": 1.153, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.37938210368156433, "learning_rate": 9.959204487506375e-05, "loss": 1.1266, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.4513859152793884, "learning_rate": 9.908210096889343e-05, "loss": 1.1218, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.5865214467048645, "learning_rate": 9.85721570627231e-05, "loss": 1.1048, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.6466606855392456, "learning_rate": 9.806221315655279e-05, "loss": 1.1064, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.5907604098320007, "learning_rate": 9.755226925038246e-05, "loss": 1.0716, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.7884001135826111, "learning_rate": 9.704232534421214e-05, "loss": 1.0656, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.7752586007118225, "learning_rate": 9.653238143804181e-05, "loss": 1.065, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.5188919901847839, "learning_rate": 9.60224375318715e-05, "loss": 1.0606, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.5289068818092346, "learning_rate": 9.551249362570118e-05, "loss": 1.0537, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 1.0006146430969238, "learning_rate": 9.500254971953085e-05, "loss": 1.0528, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.5318694710731506, "learning_rate": 9.449260581336054e-05, "loss": 1.0357, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.5409672260284424, "learning_rate": 9.398266190719021e-05, "loss": 1.0264, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.5338054299354553, "learning_rate": 9.347271800101989e-05, "loss": 1.0319, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.5304291844367981, "learning_rate": 9.296277409484956e-05, "loss": 1.0301, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.5799819231033325, "learning_rate": 9.245283018867925e-05, "loss": 1.0179, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.4919432997703552, "learning_rate": 9.194288628250894e-05, "loss": 1.0174, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.5090098977088928, "learning_rate": 9.14329423763386e-05, "loss": 1.0261, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.5532674193382263, "learning_rate": 9.092299847016829e-05, "loss": 1.0239, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.5546780824661255, "learning_rate": 9.041305456399796e-05, "loss": 1.0072, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5483475923538208, "learning_rate": 8.990311065782764e-05, "loss": 1.0121, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.4962722063064575, "learning_rate": 8.939316675165733e-05, "loss": 1.0097, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.5032678842544556, "learning_rate": 8.8883222845487e-05, "loss": 1.0085, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.48048779368400574, "learning_rate": 8.837327893931669e-05, "loss": 1.006, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.493956595659256, "learning_rate": 8.786333503314635e-05, "loss": 0.9991, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.4832962155342102, "learning_rate": 8.735339112697604e-05, "loss": 0.9994, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.44359833002090454, "learning_rate": 8.684344722080571e-05, "loss": 0.9949, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.432824045419693, "learning_rate": 8.633350331463539e-05, "loss": 0.9945, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.5194958448410034, "learning_rate": 8.582355940846507e-05, "loss": 1.0005, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.4381203353404999, "learning_rate": 8.531361550229475e-05, "loss": 0.9971, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.4479101300239563, "learning_rate": 8.480367159612444e-05, "loss": 0.9834, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.44543156027793884, "learning_rate": 8.42937276899541e-05, "loss": 0.9811, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.46895870566368103, "learning_rate": 8.378378378378379e-05, "loss": 0.9969, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.42161303758621216, "learning_rate": 8.327383987761347e-05, "loss": 0.9852, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.4941897690296173, "learning_rate": 8.276389597144315e-05, "loss": 0.9878, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.4448719918727875, "learning_rate": 8.225395206527282e-05, "loss": 0.9956, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.4166922867298126, "learning_rate": 8.17440081591025e-05, "loss": 0.9899, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.40304499864578247, "learning_rate": 8.123406425293219e-05, "loss": 0.9908, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.43452388048171997, "learning_rate": 8.072412034676186e-05, "loss": 0.9705, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.4060077965259552, "learning_rate": 8.021417644059154e-05, "loss": 0.9825, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.4520680904388428, "learning_rate": 7.970423253442122e-05, "loss": 0.9782, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.4969607889652252, "learning_rate": 7.91942886282509e-05, "loss": 0.9798, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.48629072308540344, "learning_rate": 7.868434472208057e-05, "loss": 0.9795, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.4386264979839325, "learning_rate": 7.817440081591025e-05, "loss": 0.9767, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.44945308566093445, "learning_rate": 7.766445690973994e-05, "loss": 0.9673, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.4677426218986511, "learning_rate": 7.715451300356961e-05, "loss": 0.9752, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.4450967013835907, "learning_rate": 7.664456909739929e-05, "loss": 0.9708, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.425359308719635, "learning_rate": 7.613462519122897e-05, "loss": 0.9777, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.427310049533844, "learning_rate": 7.562468128505865e-05, "loss": 0.9721, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.4417431056499481, "learning_rate": 7.511473737888832e-05, "loss": 0.9728, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.4482574164867401, "learning_rate": 7.460479347271801e-05, "loss": 0.967, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.434935063123703, "learning_rate": 7.409484956654769e-05, "loss": 0.9715, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.39001864194869995, "learning_rate": 7.358490566037736e-05, "loss": 0.9793, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.45607784390449524, "learning_rate": 7.307496175420703e-05, "loss": 0.9665, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.539667010307312, "learning_rate": 7.256501784803672e-05, "loss": 0.9664, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.4229834973812103, "learning_rate": 7.20550739418664e-05, "loss": 0.9674, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.4525637626647949, "learning_rate": 7.154513003569607e-05, "loss": 0.9671, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.3813334107398987, "learning_rate": 7.103518612952576e-05, "loss": 0.9683, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.41690248250961304, "learning_rate": 7.052524222335543e-05, "loss": 0.9652, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.4197711944580078, "learning_rate": 7.001529831718512e-05, "loss": 0.9776, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.4112277925014496, "learning_rate": 6.950535441101478e-05, "loss": 0.9627, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.4737640917301178, "learning_rate": 6.899541050484447e-05, "loss": 0.9586, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.4623068869113922, "learning_rate": 6.848546659867415e-05, "loss": 0.9637, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.47191059589385986, "learning_rate": 6.797552269250382e-05, "loss": 0.9791, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.4488503336906433, "learning_rate": 6.746557878633351e-05, "loss": 0.9654, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.4537357985973358, "learning_rate": 6.695563488016318e-05, "loss": 0.9652, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.4568102955818176, "learning_rate": 6.644569097399287e-05, "loss": 0.972, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.37428340315818787, "learning_rate": 6.593574706782255e-05, "loss": 0.9531, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.40930184721946716, "learning_rate": 6.542580316165222e-05, "loss": 0.964, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.419447124004364, "learning_rate": 6.491585925548191e-05, "loss": 0.9535, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.4166411757469177, "learning_rate": 6.440591534931157e-05, "loss": 0.9632, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.43183305859565735, "learning_rate": 6.389597144314126e-05, "loss": 0.9623, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.4423561692237854, "learning_rate": 6.338602753697093e-05, "loss": 0.9662, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.4051528871059418, "learning_rate": 6.287608363080062e-05, "loss": 0.9606, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.41725561022758484, "learning_rate": 6.23661397246303e-05, "loss": 0.9626, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.4825494587421417, "learning_rate": 6.185619581845997e-05, "loss": 0.9586, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.3946501612663269, "learning_rate": 6.134625191228966e-05, "loss": 0.9511, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.4180380702018738, "learning_rate": 6.0836308006119326e-05, "loss": 0.9525, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.4118707478046417, "learning_rate": 6.032636409994901e-05, "loss": 0.9545, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.4261344075202942, "learning_rate": 5.981642019377869e-05, "loss": 0.9533, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.39828041195869446, "learning_rate": 5.930647628760837e-05, "loss": 0.9591, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.40321114659309387, "learning_rate": 5.879653238143804e-05, "loss": 0.9541, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.39925679564476013, "learning_rate": 5.8286588475267726e-05, "loss": 0.9518, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.4243835210800171, "learning_rate": 5.777664456909741e-05, "loss": 0.9438, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.41923442482948303, "learning_rate": 5.7266700662927075e-05, "loss": 0.9637, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.3989606499671936, "learning_rate": 5.6756756756756757e-05, "loss": 0.9513, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.4022904634475708, "learning_rate": 5.624681285058644e-05, "loss": 0.9475, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.4124114513397217, "learning_rate": 5.573686894441612e-05, "loss": 0.9436, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.4305116832256317, "learning_rate": 5.5226925038245794e-05, "loss": 0.956, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.37347981333732605, "learning_rate": 5.4716981132075475e-05, "loss": 0.9589, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.43223634362220764, "learning_rate": 5.4207037225905157e-05, "loss": 0.948, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.38680389523506165, "learning_rate": 5.369709331973484e-05, "loss": 0.9474, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.42318058013916016, "learning_rate": 5.3187149413564506e-05, "loss": 0.9474, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.39201366901397705, "learning_rate": 5.267720550739419e-05, "loss": 0.9438, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.37628376483917236, "learning_rate": 5.216726160122387e-05, "loss": 0.9441, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.43160149455070496, "learning_rate": 5.165731769505354e-05, "loss": 0.9521, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.4002411663532257, "learning_rate": 5.1147373788883224e-05, "loss": 0.9425, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.4099406599998474, "learning_rate": 5.0637429882712906e-05, "loss": 0.9522, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.366636723279953, "learning_rate": 5.012748597654259e-05, "loss": 0.9493, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.39663130044937134, "learning_rate": 4.961754207037226e-05, "loss": 0.9458, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.36909976601600647, "learning_rate": 4.910759816420194e-05, "loss": 0.9455, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.3812944293022156, "learning_rate": 4.859765425803162e-05, "loss": 0.9423, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.38268741965293884, "learning_rate": 4.80877103518613e-05, "loss": 0.9487, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.3832300007343292, "learning_rate": 4.7577766445690974e-05, "loss": 0.9455, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.4103021025657654, "learning_rate": 4.7067822539520655e-05, "loss": 0.9353, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.3691622316837311, "learning_rate": 4.655787863335033e-05, "loss": 0.9514, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.42021074891090393, "learning_rate": 4.604793472718002e-05, "loss": 0.9469, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.4154146909713745, "learning_rate": 4.553799082100969e-05, "loss": 0.9367, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.41266191005706787, "learning_rate": 4.5028046914839374e-05, "loss": 0.9333, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.4136646091938019, "learning_rate": 4.451810300866905e-05, "loss": 0.9357, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.3877258002758026, "learning_rate": 4.400815910249872e-05, "loss": 0.9386, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.3618062734603882, "learning_rate": 4.3498215196328404e-05, "loss": 0.953, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.41884803771972656, "learning_rate": 4.2988271290158086e-05, "loss": 0.9491, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.4496184289455414, "learning_rate": 4.247832738398777e-05, "loss": 0.9343, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.39843064546585083, "learning_rate": 4.196838347781744e-05, "loss": 0.9486, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.3992258608341217, "learning_rate": 4.145843957164712e-05, "loss": 0.9461, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.4257832169532776, "learning_rate": 4.09484956654768e-05, "loss": 0.9427, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.4028872847557068, "learning_rate": 4.043855175930648e-05, "loss": 0.9443, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.3647397458553314, "learning_rate": 3.992860785313616e-05, "loss": 0.9482, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.4042160212993622, "learning_rate": 3.9418663946965835e-05, "loss": 0.9424, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.4318855404853821, "learning_rate": 3.8908720040795516e-05, "loss": 0.9392, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.3970791697502136, "learning_rate": 3.839877613462519e-05, "loss": 0.9369, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.40742847323417664, "learning_rate": 3.788883222845487e-05, "loss": 0.953, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.47150084376335144, "learning_rate": 3.737888832228455e-05, "loss": 0.9375, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.3862932324409485, "learning_rate": 3.6868944416114235e-05, "loss": 0.9347, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.3804072141647339, "learning_rate": 3.635900050994391e-05, "loss": 0.9504, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.3536450266838074, "learning_rate": 3.5849056603773584e-05, "loss": 0.9334, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.4137759506702423, "learning_rate": 3.5339112697603265e-05, "loss": 0.9424, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.37751680612564087, "learning_rate": 3.482916879143294e-05, "loss": 0.9355, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.3878466784954071, "learning_rate": 3.431922488526262e-05, "loss": 0.9404, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.3671165406703949, "learning_rate": 3.38092809790923e-05, "loss": 0.9448, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.3908054828643799, "learning_rate": 3.3299337072921984e-05, "loss": 0.9456, "max_memory_allocated (GB)": 91.96, "memory_allocated (GB)": 14.99, "step": 1370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.3817441463470459, "learning_rate": 3.278939316675166e-05, "loss": 0.9457, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.3505423069000244, "learning_rate": 3.227944926058134e-05, "loss": 0.9355, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.37634551525115967, "learning_rate": 3.1769505354411015e-05, "loss": 0.9271, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.3707720935344696, "learning_rate": 3.1259561448240696e-05, "loss": 0.9189, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.36191660165786743, "learning_rate": 3.074961754207038e-05, "loss": 0.9334, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.3946132957935333, "learning_rate": 3.0239673635900052e-05, "loss": 0.9423, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.38084161281585693, "learning_rate": 2.9729729729729733e-05, "loss": 0.9311, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.3604285717010498, "learning_rate": 2.9219785823559408e-05, "loss": 0.9219, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.3744727373123169, "learning_rate": 2.8709841917389093e-05, "loss": 0.9326, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.38844382762908936, "learning_rate": 2.8199898011218767e-05, "loss": 0.9332, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.39416611194610596, "learning_rate": 2.7689954105048445e-05, "loss": 0.9415, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.35142263770103455, "learning_rate": 2.7180010198878126e-05, "loss": 0.9227, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.3765707314014435, "learning_rate": 2.66700662927078e-05, "loss": 0.9316, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1500, "total_memory_available (GB)": 94.62 } ], "logging_steps": 10, "max_steps": 2022, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 1.6792232923653734e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }