minmingzhu02's picture
Upload folder using huggingface_hub
3c186aa verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9767729182110205,
"eval_steps": 500,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 1.339092493057251,
"learning_rate": 5.6012058970266934e-05,
"loss": 1.6822,
"max_memory_allocated (GB)": 91.88,
"memory_allocated (GB)": 14.99,
"step": 10,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.02,
"grad_norm": 1.4976561069488525,
"learning_rate": 7.287336883921704e-05,
"loss": 1.3895,
"max_memory_allocated (GB)": 91.9,
"memory_allocated (GB)": 14.99,
"step": 20,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.03,
"grad_norm": 0.5813677310943604,
"learning_rate": 8.273660282559241e-05,
"loss": 1.2399,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 30,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.04,
"grad_norm": 0.3463669717311859,
"learning_rate": 8.973467870816715e-05,
"loss": 1.2044,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 40,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.05,
"grad_norm": 0.3429594039916992,
"learning_rate": 9.516280807158375e-05,
"loss": 1.1798,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 50,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.06,
"grad_norm": 0.3272635042667389,
"learning_rate": 9.959791269454252e-05,
"loss": 1.153,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 60,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.07,
"grad_norm": 0.37938210368156433,
"learning_rate": 9.959204487506375e-05,
"loss": 1.1266,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 70,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.08,
"grad_norm": 0.4513859152793884,
"learning_rate": 9.908210096889343e-05,
"loss": 1.1218,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 80,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.09,
"grad_norm": 0.5865214467048645,
"learning_rate": 9.85721570627231e-05,
"loss": 1.1048,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 90,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.1,
"grad_norm": 0.6466606855392456,
"learning_rate": 9.806221315655279e-05,
"loss": 1.1064,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 100,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.11,
"grad_norm": 0.5907604098320007,
"learning_rate": 9.755226925038246e-05,
"loss": 1.0716,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 110,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.12,
"grad_norm": 0.7884001135826111,
"learning_rate": 9.704232534421214e-05,
"loss": 1.0656,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 120,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.13,
"grad_norm": 0.7752586007118225,
"learning_rate": 9.653238143804181e-05,
"loss": 1.065,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.14,
"grad_norm": 0.5188919901847839,
"learning_rate": 9.60224375318715e-05,
"loss": 1.0606,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 140,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.15,
"grad_norm": 0.5289068818092346,
"learning_rate": 9.551249362570118e-05,
"loss": 1.0537,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 150,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.16,
"grad_norm": 1.0006146430969238,
"learning_rate": 9.500254971953085e-05,
"loss": 1.0528,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 160,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.17,
"grad_norm": 0.5318694710731506,
"learning_rate": 9.449260581336054e-05,
"loss": 1.0357,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.18,
"grad_norm": 0.5409672260284424,
"learning_rate": 9.398266190719021e-05,
"loss": 1.0264,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 180,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.19,
"grad_norm": 0.5338054299354553,
"learning_rate": 9.347271800101989e-05,
"loss": 1.0319,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 190,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.2,
"grad_norm": 0.5304291844367981,
"learning_rate": 9.296277409484956e-05,
"loss": 1.0301,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 200,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.21,
"grad_norm": 0.5799819231033325,
"learning_rate": 9.245283018867925e-05,
"loss": 1.0179,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 210,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.22,
"grad_norm": 0.4919432997703552,
"learning_rate": 9.194288628250894e-05,
"loss": 1.0174,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 220,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.23,
"grad_norm": 0.5090098977088928,
"learning_rate": 9.14329423763386e-05,
"loss": 1.0261,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 230,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.24,
"grad_norm": 0.5532674193382263,
"learning_rate": 9.092299847016829e-05,
"loss": 1.0239,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 240,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.25,
"grad_norm": 0.5546780824661255,
"learning_rate": 9.041305456399796e-05,
"loss": 1.0072,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 250,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.26,
"grad_norm": 0.5483475923538208,
"learning_rate": 8.990311065782764e-05,
"loss": 1.0121,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 260,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.27,
"grad_norm": 0.4962722063064575,
"learning_rate": 8.939316675165733e-05,
"loss": 1.0097,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 270,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.28,
"grad_norm": 0.5032678842544556,
"learning_rate": 8.8883222845487e-05,
"loss": 1.0085,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 280,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.29,
"grad_norm": 0.48048779368400574,
"learning_rate": 8.837327893931669e-05,
"loss": 1.006,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 290,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.3,
"grad_norm": 0.493956595659256,
"learning_rate": 8.786333503314635e-05,
"loss": 0.9991,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 300,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.31,
"grad_norm": 0.4832962155342102,
"learning_rate": 8.735339112697604e-05,
"loss": 0.9994,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 310,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.32,
"grad_norm": 0.44359833002090454,
"learning_rate": 8.684344722080571e-05,
"loss": 0.9949,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 320,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.33,
"grad_norm": 0.432824045419693,
"learning_rate": 8.633350331463539e-05,
"loss": 0.9945,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 330,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.34,
"grad_norm": 0.5194958448410034,
"learning_rate": 8.582355940846507e-05,
"loss": 1.0005,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 340,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.35,
"grad_norm": 0.4381203353404999,
"learning_rate": 8.531361550229475e-05,
"loss": 0.9971,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 350,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.36,
"grad_norm": 0.4479101300239563,
"learning_rate": 8.480367159612444e-05,
"loss": 0.9834,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 360,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.37,
"grad_norm": 0.44543156027793884,
"learning_rate": 8.42937276899541e-05,
"loss": 0.9811,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 370,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.38,
"grad_norm": 0.46895870566368103,
"learning_rate": 8.378378378378379e-05,
"loss": 0.9969,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 380,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.39,
"grad_norm": 0.42161303758621216,
"learning_rate": 8.327383987761347e-05,
"loss": 0.9852,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 390,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.4,
"grad_norm": 0.4941897690296173,
"learning_rate": 8.276389597144315e-05,
"loss": 0.9878,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 400,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.41,
"grad_norm": 0.4448719918727875,
"learning_rate": 8.225395206527282e-05,
"loss": 0.9956,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 410,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.42,
"grad_norm": 0.4166922867298126,
"learning_rate": 8.17440081591025e-05,
"loss": 0.9899,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 420,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.43,
"grad_norm": 0.40304499864578247,
"learning_rate": 8.123406425293219e-05,
"loss": 0.9908,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 430,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.43,
"grad_norm": 0.43452388048171997,
"learning_rate": 8.072412034676186e-05,
"loss": 0.9705,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 440,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.44,
"grad_norm": 0.4060077965259552,
"learning_rate": 8.021417644059154e-05,
"loss": 0.9825,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 450,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.45,
"grad_norm": 0.4520680904388428,
"learning_rate": 7.970423253442122e-05,
"loss": 0.9782,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 460,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.46,
"grad_norm": 0.4969607889652252,
"learning_rate": 7.91942886282509e-05,
"loss": 0.9798,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 470,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.47,
"grad_norm": 0.48629072308540344,
"learning_rate": 7.868434472208057e-05,
"loss": 0.9795,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 480,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.48,
"grad_norm": 0.4386264979839325,
"learning_rate": 7.817440081591025e-05,
"loss": 0.9767,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 490,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.49,
"grad_norm": 0.44945308566093445,
"learning_rate": 7.766445690973994e-05,
"loss": 0.9673,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.5,
"grad_norm": 0.4677426218986511,
"learning_rate": 7.715451300356961e-05,
"loss": 0.9752,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 510,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.51,
"grad_norm": 0.4450967013835907,
"learning_rate": 7.664456909739929e-05,
"loss": 0.9708,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 520,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.52,
"grad_norm": 0.425359308719635,
"learning_rate": 7.613462519122897e-05,
"loss": 0.9777,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 530,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.53,
"grad_norm": 0.427310049533844,
"learning_rate": 7.562468128505865e-05,
"loss": 0.9721,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 540,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.54,
"grad_norm": 0.4417431056499481,
"learning_rate": 7.511473737888832e-05,
"loss": 0.9728,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 550,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.55,
"grad_norm": 0.4482574164867401,
"learning_rate": 7.460479347271801e-05,
"loss": 0.967,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 560,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.56,
"grad_norm": 0.434935063123703,
"learning_rate": 7.409484956654769e-05,
"loss": 0.9715,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 570,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.57,
"grad_norm": 0.39001864194869995,
"learning_rate": 7.358490566037736e-05,
"loss": 0.9793,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 580,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.58,
"grad_norm": 0.45607784390449524,
"learning_rate": 7.307496175420703e-05,
"loss": 0.9665,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 590,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.59,
"grad_norm": 0.539667010307312,
"learning_rate": 7.256501784803672e-05,
"loss": 0.9664,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 600,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6,
"grad_norm": 0.4229834973812103,
"learning_rate": 7.20550739418664e-05,
"loss": 0.9674,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 610,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.61,
"grad_norm": 0.4525637626647949,
"learning_rate": 7.154513003569607e-05,
"loss": 0.9671,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 620,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.62,
"grad_norm": 0.3813334107398987,
"learning_rate": 7.103518612952576e-05,
"loss": 0.9683,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 630,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.63,
"grad_norm": 0.41690248250961304,
"learning_rate": 7.052524222335543e-05,
"loss": 0.9652,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 640,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.64,
"grad_norm": 0.4197711944580078,
"learning_rate": 7.001529831718512e-05,
"loss": 0.9776,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 650,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.65,
"grad_norm": 0.4112277925014496,
"learning_rate": 6.950535441101478e-05,
"loss": 0.9627,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 660,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.66,
"grad_norm": 0.4737640917301178,
"learning_rate": 6.899541050484447e-05,
"loss": 0.9586,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 670,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.67,
"grad_norm": 0.4623068869113922,
"learning_rate": 6.848546659867415e-05,
"loss": 0.9637,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 680,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.68,
"grad_norm": 0.47191059589385986,
"learning_rate": 6.797552269250382e-05,
"loss": 0.9791,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 690,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.69,
"grad_norm": 0.4488503336906433,
"learning_rate": 6.746557878633351e-05,
"loss": 0.9654,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 700,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.7,
"grad_norm": 0.4537357985973358,
"learning_rate": 6.695563488016318e-05,
"loss": 0.9652,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 710,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.71,
"grad_norm": 0.4568102955818176,
"learning_rate": 6.644569097399287e-05,
"loss": 0.972,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 720,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.72,
"grad_norm": 0.37428340315818787,
"learning_rate": 6.593574706782255e-05,
"loss": 0.9531,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 730,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.73,
"grad_norm": 0.40930184721946716,
"learning_rate": 6.542580316165222e-05,
"loss": 0.964,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 740,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.74,
"grad_norm": 0.419447124004364,
"learning_rate": 6.491585925548191e-05,
"loss": 0.9535,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 750,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.75,
"grad_norm": 0.4166411757469177,
"learning_rate": 6.440591534931157e-05,
"loss": 0.9632,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 760,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.76,
"grad_norm": 0.43183305859565735,
"learning_rate": 6.389597144314126e-05,
"loss": 0.9623,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 770,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.77,
"grad_norm": 0.4423561692237854,
"learning_rate": 6.338602753697093e-05,
"loss": 0.9662,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 780,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.78,
"grad_norm": 0.4051528871059418,
"learning_rate": 6.287608363080062e-05,
"loss": 0.9606,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 790,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.79,
"grad_norm": 0.41725561022758484,
"learning_rate": 6.23661397246303e-05,
"loss": 0.9626,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 800,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8,
"grad_norm": 0.4825494587421417,
"learning_rate": 6.185619581845997e-05,
"loss": 0.9586,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 810,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.81,
"grad_norm": 0.3946501612663269,
"learning_rate": 6.134625191228966e-05,
"loss": 0.9511,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 820,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.82,
"grad_norm": 0.4180380702018738,
"learning_rate": 6.0836308006119326e-05,
"loss": 0.9525,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 830,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.83,
"grad_norm": 0.4118707478046417,
"learning_rate": 6.032636409994901e-05,
"loss": 0.9545,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 840,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.84,
"grad_norm": 0.4261344075202942,
"learning_rate": 5.981642019377869e-05,
"loss": 0.9533,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 850,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.85,
"grad_norm": 0.39828041195869446,
"learning_rate": 5.930647628760837e-05,
"loss": 0.9591,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 860,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.86,
"grad_norm": 0.40321114659309387,
"learning_rate": 5.879653238143804e-05,
"loss": 0.9541,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 870,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.87,
"grad_norm": 0.39925679564476013,
"learning_rate": 5.8286588475267726e-05,
"loss": 0.9518,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 880,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.88,
"grad_norm": 0.4243835210800171,
"learning_rate": 5.777664456909741e-05,
"loss": 0.9438,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 890,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.89,
"grad_norm": 0.41923442482948303,
"learning_rate": 5.7266700662927075e-05,
"loss": 0.9637,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 900,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.9,
"grad_norm": 0.3989606499671936,
"learning_rate": 5.6756756756756757e-05,
"loss": 0.9513,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 910,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.91,
"grad_norm": 0.4022904634475708,
"learning_rate": 5.624681285058644e-05,
"loss": 0.9475,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 920,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.92,
"grad_norm": 0.4124114513397217,
"learning_rate": 5.573686894441612e-05,
"loss": 0.9436,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 930,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.93,
"grad_norm": 0.4305116832256317,
"learning_rate": 5.5226925038245794e-05,
"loss": 0.956,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 940,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.94,
"grad_norm": 0.37347981333732605,
"learning_rate": 5.4716981132075475e-05,
"loss": 0.9589,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 950,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.95,
"grad_norm": 0.43223634362220764,
"learning_rate": 5.4207037225905157e-05,
"loss": 0.948,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 960,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.96,
"grad_norm": 0.38680389523506165,
"learning_rate": 5.369709331973484e-05,
"loss": 0.9474,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 970,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.97,
"grad_norm": 0.42318058013916016,
"learning_rate": 5.3187149413564506e-05,
"loss": 0.9474,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 980,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.98,
"grad_norm": 0.39201366901397705,
"learning_rate": 5.267720550739419e-05,
"loss": 0.9438,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 990,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.99,
"grad_norm": 0.37628376483917236,
"learning_rate": 5.216726160122387e-05,
"loss": 0.9441,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.0,
"grad_norm": 0.43160149455070496,
"learning_rate": 5.165731769505354e-05,
"loss": 0.9521,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1010,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.01,
"grad_norm": 0.4002411663532257,
"learning_rate": 5.1147373788883224e-05,
"loss": 0.9425,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1020,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.02,
"grad_norm": 0.4099406599998474,
"learning_rate": 5.0637429882712906e-05,
"loss": 0.9522,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1030,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.03,
"grad_norm": 0.366636723279953,
"learning_rate": 5.012748597654259e-05,
"loss": 0.9493,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1040,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.04,
"grad_norm": 0.39663130044937134,
"learning_rate": 4.961754207037226e-05,
"loss": 0.9458,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1050,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.05,
"grad_norm": 0.36909976601600647,
"learning_rate": 4.910759816420194e-05,
"loss": 0.9455,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1060,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.06,
"grad_norm": 0.3812944293022156,
"learning_rate": 4.859765425803162e-05,
"loss": 0.9423,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1070,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.07,
"grad_norm": 0.38268741965293884,
"learning_rate": 4.80877103518613e-05,
"loss": 0.9487,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1080,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.08,
"grad_norm": 0.3832300007343292,
"learning_rate": 4.7577766445690974e-05,
"loss": 0.9455,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1090,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.09,
"grad_norm": 0.4103021025657654,
"learning_rate": 4.7067822539520655e-05,
"loss": 0.9353,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1100,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.1,
"grad_norm": 0.3691622316837311,
"learning_rate": 4.655787863335033e-05,
"loss": 0.9514,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1110,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.11,
"grad_norm": 0.42021074891090393,
"learning_rate": 4.604793472718002e-05,
"loss": 0.9469,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1120,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.12,
"grad_norm": 0.4154146909713745,
"learning_rate": 4.553799082100969e-05,
"loss": 0.9367,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.13,
"grad_norm": 0.41266191005706787,
"learning_rate": 4.5028046914839374e-05,
"loss": 0.9333,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1140,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.14,
"grad_norm": 0.4136646091938019,
"learning_rate": 4.451810300866905e-05,
"loss": 0.9357,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1150,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.15,
"grad_norm": 0.3877258002758026,
"learning_rate": 4.400815910249872e-05,
"loss": 0.9386,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1160,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.16,
"grad_norm": 0.3618062734603882,
"learning_rate": 4.3498215196328404e-05,
"loss": 0.953,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.17,
"grad_norm": 0.41884803771972656,
"learning_rate": 4.2988271290158086e-05,
"loss": 0.9491,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1180,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.18,
"grad_norm": 0.4496184289455414,
"learning_rate": 4.247832738398777e-05,
"loss": 0.9343,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1190,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.19,
"grad_norm": 0.39843064546585083,
"learning_rate": 4.196838347781744e-05,
"loss": 0.9486,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1200,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.2,
"grad_norm": 0.3992258608341217,
"learning_rate": 4.145843957164712e-05,
"loss": 0.9461,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1210,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.21,
"grad_norm": 0.4257832169532776,
"learning_rate": 4.09484956654768e-05,
"loss": 0.9427,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1220,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.22,
"grad_norm": 0.4028872847557068,
"learning_rate": 4.043855175930648e-05,
"loss": 0.9443,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1230,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.23,
"grad_norm": 0.3647397458553314,
"learning_rate": 3.992860785313616e-05,
"loss": 0.9482,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1240,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.24,
"grad_norm": 0.4042160212993622,
"learning_rate": 3.9418663946965835e-05,
"loss": 0.9424,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1250,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.25,
"grad_norm": 0.4318855404853821,
"learning_rate": 3.8908720040795516e-05,
"loss": 0.9392,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1260,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.26,
"grad_norm": 0.3970791697502136,
"learning_rate": 3.839877613462519e-05,
"loss": 0.9369,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1270,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.27,
"grad_norm": 0.40742847323417664,
"learning_rate": 3.788883222845487e-05,
"loss": 0.953,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1280,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.28,
"grad_norm": 0.47150084376335144,
"learning_rate": 3.737888832228455e-05,
"loss": 0.9375,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1290,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.28,
"grad_norm": 0.3862932324409485,
"learning_rate": 3.6868944416114235e-05,
"loss": 0.9347,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1300,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.29,
"grad_norm": 0.3804072141647339,
"learning_rate": 3.635900050994391e-05,
"loss": 0.9504,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1310,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.3,
"grad_norm": 0.3536450266838074,
"learning_rate": 3.5849056603773584e-05,
"loss": 0.9334,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1320,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.31,
"grad_norm": 0.4137759506702423,
"learning_rate": 3.5339112697603265e-05,
"loss": 0.9424,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1330,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.32,
"grad_norm": 0.37751680612564087,
"learning_rate": 3.482916879143294e-05,
"loss": 0.9355,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1340,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.33,
"grad_norm": 0.3878466784954071,
"learning_rate": 3.431922488526262e-05,
"loss": 0.9404,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1350,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.34,
"grad_norm": 0.3671165406703949,
"learning_rate": 3.38092809790923e-05,
"loss": 0.9448,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1360,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.35,
"grad_norm": 0.3908054828643799,
"learning_rate": 3.3299337072921984e-05,
"loss": 0.9456,
"max_memory_allocated (GB)": 91.96,
"memory_allocated (GB)": 14.99,
"step": 1370,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.36,
"grad_norm": 0.3817441463470459,
"learning_rate": 3.278939316675166e-05,
"loss": 0.9457,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1380,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.37,
"grad_norm": 0.3505423069000244,
"learning_rate": 3.227944926058134e-05,
"loss": 0.9355,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1390,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.38,
"grad_norm": 0.37634551525115967,
"learning_rate": 3.1769505354411015e-05,
"loss": 0.9271,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1400,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.39,
"grad_norm": 0.3707720935344696,
"learning_rate": 3.1259561448240696e-05,
"loss": 0.9189,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1410,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.4,
"grad_norm": 0.36191660165786743,
"learning_rate": 3.074961754207038e-05,
"loss": 0.9334,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1420,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.41,
"grad_norm": 0.3946132957935333,
"learning_rate": 3.0239673635900052e-05,
"loss": 0.9423,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1430,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.42,
"grad_norm": 0.38084161281585693,
"learning_rate": 2.9729729729729733e-05,
"loss": 0.9311,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1440,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.43,
"grad_norm": 0.3604285717010498,
"learning_rate": 2.9219785823559408e-05,
"loss": 0.9219,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1450,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.44,
"grad_norm": 0.3744727373123169,
"learning_rate": 2.8709841917389093e-05,
"loss": 0.9326,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1460,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.45,
"grad_norm": 0.38844382762908936,
"learning_rate": 2.8199898011218767e-05,
"loss": 0.9332,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1470,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.46,
"grad_norm": 0.39416611194610596,
"learning_rate": 2.7689954105048445e-05,
"loss": 0.9415,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1480,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.47,
"grad_norm": 0.35142263770103455,
"learning_rate": 2.7180010198878126e-05,
"loss": 0.9227,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1490,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.48,
"grad_norm": 0.3765707314014435,
"learning_rate": 2.66700662927078e-05,
"loss": 0.9316,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.49,
"grad_norm": 0.3772505521774292,
"learning_rate": 2.6160122386537482e-05,
"loss": 0.944,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1510,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.5,
"grad_norm": 0.36364176869392395,
"learning_rate": 2.565017848036716e-05,
"loss": 0.9365,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1520,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.51,
"grad_norm": 0.4053816795349121,
"learning_rate": 2.5140234574196842e-05,
"loss": 0.9345,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1530,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.52,
"grad_norm": 0.3838883936405182,
"learning_rate": 2.4630290668026516e-05,
"loss": 0.927,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1540,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.53,
"grad_norm": 0.3805199861526489,
"learning_rate": 2.4120346761856198e-05,
"loss": 0.9301,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1550,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.54,
"grad_norm": 0.36040347814559937,
"learning_rate": 2.3610402855685876e-05,
"loss": 0.9489,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1560,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.55,
"grad_norm": 0.35256314277648926,
"learning_rate": 2.3100458949515554e-05,
"loss": 0.9327,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1570,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.56,
"grad_norm": 0.3900443911552429,
"learning_rate": 2.2590515043345235e-05,
"loss": 0.9378,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1580,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.57,
"grad_norm": 0.38479581475257874,
"learning_rate": 2.2080571137174913e-05,
"loss": 0.9244,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1590,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.58,
"grad_norm": 0.375758558511734,
"learning_rate": 2.157062723100459e-05,
"loss": 0.9348,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1600,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.59,
"grad_norm": 0.3782336413860321,
"learning_rate": 2.106068332483427e-05,
"loss": 0.9289,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1610,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.6,
"grad_norm": 0.3771725296974182,
"learning_rate": 2.0550739418663947e-05,
"loss": 0.9457,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1620,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.61,
"grad_norm": 0.378215491771698,
"learning_rate": 2.0040795512493625e-05,
"loss": 0.9326,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1630,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.62,
"grad_norm": 0.40586233139038086,
"learning_rate": 1.9530851606323306e-05,
"loss": 0.9244,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1640,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.63,
"grad_norm": 0.37859204411506653,
"learning_rate": 1.9020907700152984e-05,
"loss": 0.9304,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1650,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.64,
"grad_norm": 0.3941766619682312,
"learning_rate": 1.8510963793982662e-05,
"loss": 0.9369,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1660,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.65,
"grad_norm": 0.3579218089580536,
"learning_rate": 1.8001019887812343e-05,
"loss": 0.9351,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1670,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.66,
"grad_norm": 0.3728456199169159,
"learning_rate": 1.7491075981642018e-05,
"loss": 0.9248,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1680,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.67,
"grad_norm": 0.39080294966697693,
"learning_rate": 1.69811320754717e-05,
"loss": 0.9437,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1690,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.68,
"grad_norm": 0.38086754083633423,
"learning_rate": 1.6471188169301377e-05,
"loss": 0.9371,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1700,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.69,
"grad_norm": 0.37228095531463623,
"learning_rate": 1.5961244263131055e-05,
"loss": 0.934,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1710,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7,
"grad_norm": 0.37343358993530273,
"learning_rate": 1.5451300356960737e-05,
"loss": 0.9348,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1720,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.71,
"grad_norm": 0.39512866735458374,
"learning_rate": 1.4941356450790415e-05,
"loss": 0.9378,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1730,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.72,
"grad_norm": 0.36341536045074463,
"learning_rate": 1.4431412544620093e-05,
"loss": 0.9203,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1740,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.73,
"grad_norm": 0.36111101508140564,
"learning_rate": 1.3921468638449772e-05,
"loss": 0.9311,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1750,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.74,
"grad_norm": 0.34905844926834106,
"learning_rate": 1.3411524732279452e-05,
"loss": 0.9366,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1760,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.75,
"grad_norm": 0.3797436058521271,
"learning_rate": 1.2901580826109128e-05,
"loss": 0.9294,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1770,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.76,
"grad_norm": 0.38309234380722046,
"learning_rate": 1.2391636919938808e-05,
"loss": 0.9372,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1780,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.77,
"grad_norm": 0.3702026605606079,
"learning_rate": 1.1881693013768486e-05,
"loss": 0.9225,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1790,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.78,
"grad_norm": 0.36598923802375793,
"learning_rate": 1.1371749107598164e-05,
"loss": 0.9242,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1800,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.79,
"grad_norm": 0.40574029088020325,
"learning_rate": 1.0861805201427844e-05,
"loss": 0.9339,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1810,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.8,
"grad_norm": 0.35166865587234497,
"learning_rate": 1.0351861295257523e-05,
"loss": 0.9262,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1820,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.81,
"grad_norm": 0.3604579567909241,
"learning_rate": 9.841917389087201e-06,
"loss": 0.9382,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1830,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.82,
"grad_norm": 0.3669303059577942,
"learning_rate": 9.33197348291688e-06,
"loss": 0.9334,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1840,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.83,
"grad_norm": 0.36166125535964966,
"learning_rate": 8.822029576746559e-06,
"loss": 0.9312,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1850,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.84,
"grad_norm": 0.34307852387428284,
"learning_rate": 8.312085670576237e-06,
"loss": 0.9336,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1860,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.85,
"grad_norm": 0.3532688021659851,
"learning_rate": 7.802141764405915e-06,
"loss": 0.9259,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1870,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.86,
"grad_norm": 0.3652610182762146,
"learning_rate": 7.2921978582355945e-06,
"loss": 0.9194,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1880,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.87,
"grad_norm": 0.34755492210388184,
"learning_rate": 6.782253952065273e-06,
"loss": 0.9336,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1890,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.88,
"grad_norm": 0.3535468876361847,
"learning_rate": 6.272310045894952e-06,
"loss": 0.9324,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1900,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.89,
"grad_norm": 0.3971847891807556,
"learning_rate": 5.762366139724631e-06,
"loss": 0.9365,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1910,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.9,
"grad_norm": 0.3563123643398285,
"learning_rate": 5.252422233554309e-06,
"loss": 0.9358,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1920,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.91,
"grad_norm": 0.34811973571777344,
"learning_rate": 4.742478327383989e-06,
"loss": 0.933,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1930,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.92,
"grad_norm": 0.36352258920669556,
"learning_rate": 4.2325344212136666e-06,
"loss": 0.9298,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1940,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.93,
"grad_norm": 0.3368126153945923,
"learning_rate": 3.7225905150433454e-06,
"loss": 0.9319,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1950,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.94,
"grad_norm": 0.3323891758918762,
"learning_rate": 3.2126466088730238e-06,
"loss": 0.9384,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1960,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.95,
"grad_norm": 0.3528982698917389,
"learning_rate": 2.702702702702703e-06,
"loss": 0.9277,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1970,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.96,
"grad_norm": 0.36531731486320496,
"learning_rate": 2.192758796532382e-06,
"loss": 0.921,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1980,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.97,
"grad_norm": 0.3461114168167114,
"learning_rate": 1.6828148903620604e-06,
"loss": 0.9348,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 1990,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.98,
"grad_norm": 0.36876726150512695,
"learning_rate": 1.1728709841917388e-06,
"loss": 0.9268,
"max_memory_allocated (GB)": 91.97,
"memory_allocated (GB)": 14.99,
"step": 2000,
"total_memory_available (GB)": 94.62
}
],
"logging_steps": 10,
"max_steps": 2022,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 2.238964389820498e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}