|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9767729182110205, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.339092493057251, |
|
"learning_rate": 5.6012058970266934e-05, |
|
"loss": 1.6822, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4976561069488525, |
|
"learning_rate": 7.287336883921704e-05, |
|
"loss": 1.3895, |
|
"max_memory_allocated (GB)": 91.9, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.5813677310943604, |
|
"learning_rate": 8.273660282559241e-05, |
|
"loss": 1.2399, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3463669717311859, |
|
"learning_rate": 8.973467870816715e-05, |
|
"loss": 1.2044, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3429594039916992, |
|
"learning_rate": 9.516280807158375e-05, |
|
"loss": 1.1798, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3272635042667389, |
|
"learning_rate": 9.959791269454252e-05, |
|
"loss": 1.153, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.37938210368156433, |
|
"learning_rate": 9.959204487506375e-05, |
|
"loss": 1.1266, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4513859152793884, |
|
"learning_rate": 9.908210096889343e-05, |
|
"loss": 1.1218, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5865214467048645, |
|
"learning_rate": 9.85721570627231e-05, |
|
"loss": 1.1048, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6466606855392456, |
|
"learning_rate": 9.806221315655279e-05, |
|
"loss": 1.1064, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5907604098320007, |
|
"learning_rate": 9.755226925038246e-05, |
|
"loss": 1.0716, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.7884001135826111, |
|
"learning_rate": 9.704232534421214e-05, |
|
"loss": 1.0656, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7752586007118225, |
|
"learning_rate": 9.653238143804181e-05, |
|
"loss": 1.065, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.5188919901847839, |
|
"learning_rate": 9.60224375318715e-05, |
|
"loss": 1.0606, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5289068818092346, |
|
"learning_rate": 9.551249362570118e-05, |
|
"loss": 1.0537, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.0006146430969238, |
|
"learning_rate": 9.500254971953085e-05, |
|
"loss": 1.0528, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5318694710731506, |
|
"learning_rate": 9.449260581336054e-05, |
|
"loss": 1.0357, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.5409672260284424, |
|
"learning_rate": 9.398266190719021e-05, |
|
"loss": 1.0264, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.5338054299354553, |
|
"learning_rate": 9.347271800101989e-05, |
|
"loss": 1.0319, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5304291844367981, |
|
"learning_rate": 9.296277409484956e-05, |
|
"loss": 1.0301, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.5799819231033325, |
|
"learning_rate": 9.245283018867925e-05, |
|
"loss": 1.0179, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.4919432997703552, |
|
"learning_rate": 9.194288628250894e-05, |
|
"loss": 1.0174, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5090098977088928, |
|
"learning_rate": 9.14329423763386e-05, |
|
"loss": 1.0261, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5532674193382263, |
|
"learning_rate": 9.092299847016829e-05, |
|
"loss": 1.0239, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5546780824661255, |
|
"learning_rate": 9.041305456399796e-05, |
|
"loss": 1.0072, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.5483475923538208, |
|
"learning_rate": 8.990311065782764e-05, |
|
"loss": 1.0121, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.4962722063064575, |
|
"learning_rate": 8.939316675165733e-05, |
|
"loss": 1.0097, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.5032678842544556, |
|
"learning_rate": 8.8883222845487e-05, |
|
"loss": 1.0085, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.48048779368400574, |
|
"learning_rate": 8.837327893931669e-05, |
|
"loss": 1.006, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.493956595659256, |
|
"learning_rate": 8.786333503314635e-05, |
|
"loss": 0.9991, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.4832962155342102, |
|
"learning_rate": 8.735339112697604e-05, |
|
"loss": 0.9994, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.44359833002090454, |
|
"learning_rate": 8.684344722080571e-05, |
|
"loss": 0.9949, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.432824045419693, |
|
"learning_rate": 8.633350331463539e-05, |
|
"loss": 0.9945, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.5194958448410034, |
|
"learning_rate": 8.582355940846507e-05, |
|
"loss": 1.0005, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.4381203353404999, |
|
"learning_rate": 8.531361550229475e-05, |
|
"loss": 0.9971, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.4479101300239563, |
|
"learning_rate": 8.480367159612444e-05, |
|
"loss": 0.9834, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.44543156027793884, |
|
"learning_rate": 8.42937276899541e-05, |
|
"loss": 0.9811, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.46895870566368103, |
|
"learning_rate": 8.378378378378379e-05, |
|
"loss": 0.9969, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.42161303758621216, |
|
"learning_rate": 8.327383987761347e-05, |
|
"loss": 0.9852, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.4941897690296173, |
|
"learning_rate": 8.276389597144315e-05, |
|
"loss": 0.9878, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.4448719918727875, |
|
"learning_rate": 8.225395206527282e-05, |
|
"loss": 0.9956, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.4166922867298126, |
|
"learning_rate": 8.17440081591025e-05, |
|
"loss": 0.9899, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.40304499864578247, |
|
"learning_rate": 8.123406425293219e-05, |
|
"loss": 0.9908, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.43452388048171997, |
|
"learning_rate": 8.072412034676186e-05, |
|
"loss": 0.9705, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.4060077965259552, |
|
"learning_rate": 8.021417644059154e-05, |
|
"loss": 0.9825, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.4520680904388428, |
|
"learning_rate": 7.970423253442122e-05, |
|
"loss": 0.9782, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.4969607889652252, |
|
"learning_rate": 7.91942886282509e-05, |
|
"loss": 0.9798, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.48629072308540344, |
|
"learning_rate": 7.868434472208057e-05, |
|
"loss": 0.9795, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.4386264979839325, |
|
"learning_rate": 7.817440081591025e-05, |
|
"loss": 0.9767, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.44945308566093445, |
|
"learning_rate": 7.766445690973994e-05, |
|
"loss": 0.9673, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.4677426218986511, |
|
"learning_rate": 7.715451300356961e-05, |
|
"loss": 0.9752, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.4450967013835907, |
|
"learning_rate": 7.664456909739929e-05, |
|
"loss": 0.9708, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.425359308719635, |
|
"learning_rate": 7.613462519122897e-05, |
|
"loss": 0.9777, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.427310049533844, |
|
"learning_rate": 7.562468128505865e-05, |
|
"loss": 0.9721, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.4417431056499481, |
|
"learning_rate": 7.511473737888832e-05, |
|
"loss": 0.9728, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.4482574164867401, |
|
"learning_rate": 7.460479347271801e-05, |
|
"loss": 0.967, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.434935063123703, |
|
"learning_rate": 7.409484956654769e-05, |
|
"loss": 0.9715, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.39001864194869995, |
|
"learning_rate": 7.358490566037736e-05, |
|
"loss": 0.9793, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.45607784390449524, |
|
"learning_rate": 7.307496175420703e-05, |
|
"loss": 0.9665, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.539667010307312, |
|
"learning_rate": 7.256501784803672e-05, |
|
"loss": 0.9664, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.4229834973812103, |
|
"learning_rate": 7.20550739418664e-05, |
|
"loss": 0.9674, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.4525637626647949, |
|
"learning_rate": 7.154513003569607e-05, |
|
"loss": 0.9671, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.3813334107398987, |
|
"learning_rate": 7.103518612952576e-05, |
|
"loss": 0.9683, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.41690248250961304, |
|
"learning_rate": 7.052524222335543e-05, |
|
"loss": 0.9652, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.4197711944580078, |
|
"learning_rate": 7.001529831718512e-05, |
|
"loss": 0.9776, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.4112277925014496, |
|
"learning_rate": 6.950535441101478e-05, |
|
"loss": 0.9627, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.4737640917301178, |
|
"learning_rate": 6.899541050484447e-05, |
|
"loss": 0.9586, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.4623068869113922, |
|
"learning_rate": 6.848546659867415e-05, |
|
"loss": 0.9637, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.47191059589385986, |
|
"learning_rate": 6.797552269250382e-05, |
|
"loss": 0.9791, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.4488503336906433, |
|
"learning_rate": 6.746557878633351e-05, |
|
"loss": 0.9654, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4537357985973358, |
|
"learning_rate": 6.695563488016318e-05, |
|
"loss": 0.9652, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.4568102955818176, |
|
"learning_rate": 6.644569097399287e-05, |
|
"loss": 0.972, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.37428340315818787, |
|
"learning_rate": 6.593574706782255e-05, |
|
"loss": 0.9531, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.40930184721946716, |
|
"learning_rate": 6.542580316165222e-05, |
|
"loss": 0.964, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.419447124004364, |
|
"learning_rate": 6.491585925548191e-05, |
|
"loss": 0.9535, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.4166411757469177, |
|
"learning_rate": 6.440591534931157e-05, |
|
"loss": 0.9632, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.43183305859565735, |
|
"learning_rate": 6.389597144314126e-05, |
|
"loss": 0.9623, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.4423561692237854, |
|
"learning_rate": 6.338602753697093e-05, |
|
"loss": 0.9662, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.4051528871059418, |
|
"learning_rate": 6.287608363080062e-05, |
|
"loss": 0.9606, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.41725561022758484, |
|
"learning_rate": 6.23661397246303e-05, |
|
"loss": 0.9626, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.4825494587421417, |
|
"learning_rate": 6.185619581845997e-05, |
|
"loss": 0.9586, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.3946501612663269, |
|
"learning_rate": 6.134625191228966e-05, |
|
"loss": 0.9511, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.4180380702018738, |
|
"learning_rate": 6.0836308006119326e-05, |
|
"loss": 0.9525, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.4118707478046417, |
|
"learning_rate": 6.032636409994901e-05, |
|
"loss": 0.9545, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.4261344075202942, |
|
"learning_rate": 5.981642019377869e-05, |
|
"loss": 0.9533, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.39828041195869446, |
|
"learning_rate": 5.930647628760837e-05, |
|
"loss": 0.9591, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.40321114659309387, |
|
"learning_rate": 5.879653238143804e-05, |
|
"loss": 0.9541, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.39925679564476013, |
|
"learning_rate": 5.8286588475267726e-05, |
|
"loss": 0.9518, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.4243835210800171, |
|
"learning_rate": 5.777664456909741e-05, |
|
"loss": 0.9438, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.41923442482948303, |
|
"learning_rate": 5.7266700662927075e-05, |
|
"loss": 0.9637, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.3989606499671936, |
|
"learning_rate": 5.6756756756756757e-05, |
|
"loss": 0.9513, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.4022904634475708, |
|
"learning_rate": 5.624681285058644e-05, |
|
"loss": 0.9475, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.4124114513397217, |
|
"learning_rate": 5.573686894441612e-05, |
|
"loss": 0.9436, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.4305116832256317, |
|
"learning_rate": 5.5226925038245794e-05, |
|
"loss": 0.956, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.37347981333732605, |
|
"learning_rate": 5.4716981132075475e-05, |
|
"loss": 0.9589, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.43223634362220764, |
|
"learning_rate": 5.4207037225905157e-05, |
|
"loss": 0.948, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.38680389523506165, |
|
"learning_rate": 5.369709331973484e-05, |
|
"loss": 0.9474, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.42318058013916016, |
|
"learning_rate": 5.3187149413564506e-05, |
|
"loss": 0.9474, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.39201366901397705, |
|
"learning_rate": 5.267720550739419e-05, |
|
"loss": 0.9438, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.37628376483917236, |
|
"learning_rate": 5.216726160122387e-05, |
|
"loss": 0.9441, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.43160149455070496, |
|
"learning_rate": 5.165731769505354e-05, |
|
"loss": 0.9521, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.4002411663532257, |
|
"learning_rate": 5.1147373788883224e-05, |
|
"loss": 0.9425, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.4099406599998474, |
|
"learning_rate": 5.0637429882712906e-05, |
|
"loss": 0.9522, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.366636723279953, |
|
"learning_rate": 5.012748597654259e-05, |
|
"loss": 0.9493, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.39663130044937134, |
|
"learning_rate": 4.961754207037226e-05, |
|
"loss": 0.9458, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.36909976601600647, |
|
"learning_rate": 4.910759816420194e-05, |
|
"loss": 0.9455, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.3812944293022156, |
|
"learning_rate": 4.859765425803162e-05, |
|
"loss": 0.9423, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.38268741965293884, |
|
"learning_rate": 4.80877103518613e-05, |
|
"loss": 0.9487, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.3832300007343292, |
|
"learning_rate": 4.7577766445690974e-05, |
|
"loss": 0.9455, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.4103021025657654, |
|
"learning_rate": 4.7067822539520655e-05, |
|
"loss": 0.9353, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.3691622316837311, |
|
"learning_rate": 4.655787863335033e-05, |
|
"loss": 0.9514, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.42021074891090393, |
|
"learning_rate": 4.604793472718002e-05, |
|
"loss": 0.9469, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.4154146909713745, |
|
"learning_rate": 4.553799082100969e-05, |
|
"loss": 0.9367, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.41266191005706787, |
|
"learning_rate": 4.5028046914839374e-05, |
|
"loss": 0.9333, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.4136646091938019, |
|
"learning_rate": 4.451810300866905e-05, |
|
"loss": 0.9357, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.3877258002758026, |
|
"learning_rate": 4.400815910249872e-05, |
|
"loss": 0.9386, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.3618062734603882, |
|
"learning_rate": 4.3498215196328404e-05, |
|
"loss": 0.953, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.41884803771972656, |
|
"learning_rate": 4.2988271290158086e-05, |
|
"loss": 0.9491, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.4496184289455414, |
|
"learning_rate": 4.247832738398777e-05, |
|
"loss": 0.9343, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.39843064546585083, |
|
"learning_rate": 4.196838347781744e-05, |
|
"loss": 0.9486, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.3992258608341217, |
|
"learning_rate": 4.145843957164712e-05, |
|
"loss": 0.9461, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.4257832169532776, |
|
"learning_rate": 4.09484956654768e-05, |
|
"loss": 0.9427, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.4028872847557068, |
|
"learning_rate": 4.043855175930648e-05, |
|
"loss": 0.9443, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.3647397458553314, |
|
"learning_rate": 3.992860785313616e-05, |
|
"loss": 0.9482, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.4042160212993622, |
|
"learning_rate": 3.9418663946965835e-05, |
|
"loss": 0.9424, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.4318855404853821, |
|
"learning_rate": 3.8908720040795516e-05, |
|
"loss": 0.9392, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.3970791697502136, |
|
"learning_rate": 3.839877613462519e-05, |
|
"loss": 0.9369, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.40742847323417664, |
|
"learning_rate": 3.788883222845487e-05, |
|
"loss": 0.953, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.47150084376335144, |
|
"learning_rate": 3.737888832228455e-05, |
|
"loss": 0.9375, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.3862932324409485, |
|
"learning_rate": 3.6868944416114235e-05, |
|
"loss": 0.9347, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.3804072141647339, |
|
"learning_rate": 3.635900050994391e-05, |
|
"loss": 0.9504, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.3536450266838074, |
|
"learning_rate": 3.5849056603773584e-05, |
|
"loss": 0.9334, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.4137759506702423, |
|
"learning_rate": 3.5339112697603265e-05, |
|
"loss": 0.9424, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.37751680612564087, |
|
"learning_rate": 3.482916879143294e-05, |
|
"loss": 0.9355, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.3878466784954071, |
|
"learning_rate": 3.431922488526262e-05, |
|
"loss": 0.9404, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.3671165406703949, |
|
"learning_rate": 3.38092809790923e-05, |
|
"loss": 0.9448, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.3908054828643799, |
|
"learning_rate": 3.3299337072921984e-05, |
|
"loss": 0.9456, |
|
"max_memory_allocated (GB)": 91.96, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.3817441463470459, |
|
"learning_rate": 3.278939316675166e-05, |
|
"loss": 0.9457, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.3505423069000244, |
|
"learning_rate": 3.227944926058134e-05, |
|
"loss": 0.9355, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.37634551525115967, |
|
"learning_rate": 3.1769505354411015e-05, |
|
"loss": 0.9271, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.3707720935344696, |
|
"learning_rate": 3.1259561448240696e-05, |
|
"loss": 0.9189, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.36191660165786743, |
|
"learning_rate": 3.074961754207038e-05, |
|
"loss": 0.9334, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.3946132957935333, |
|
"learning_rate": 3.0239673635900052e-05, |
|
"loss": 0.9423, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.38084161281585693, |
|
"learning_rate": 2.9729729729729733e-05, |
|
"loss": 0.9311, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.3604285717010498, |
|
"learning_rate": 2.9219785823559408e-05, |
|
"loss": 0.9219, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.3744727373123169, |
|
"learning_rate": 2.8709841917389093e-05, |
|
"loss": 0.9326, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.38844382762908936, |
|
"learning_rate": 2.8199898011218767e-05, |
|
"loss": 0.9332, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.39416611194610596, |
|
"learning_rate": 2.7689954105048445e-05, |
|
"loss": 0.9415, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.35142263770103455, |
|
"learning_rate": 2.7180010198878126e-05, |
|
"loss": 0.9227, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.3765707314014435, |
|
"learning_rate": 2.66700662927078e-05, |
|
"loss": 0.9316, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.3772505521774292, |
|
"learning_rate": 2.6160122386537482e-05, |
|
"loss": 0.944, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.36364176869392395, |
|
"learning_rate": 2.565017848036716e-05, |
|
"loss": 0.9365, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.4053816795349121, |
|
"learning_rate": 2.5140234574196842e-05, |
|
"loss": 0.9345, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.3838883936405182, |
|
"learning_rate": 2.4630290668026516e-05, |
|
"loss": 0.927, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.3805199861526489, |
|
"learning_rate": 2.4120346761856198e-05, |
|
"loss": 0.9301, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.36040347814559937, |
|
"learning_rate": 2.3610402855685876e-05, |
|
"loss": 0.9489, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.35256314277648926, |
|
"learning_rate": 2.3100458949515554e-05, |
|
"loss": 0.9327, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.3900443911552429, |
|
"learning_rate": 2.2590515043345235e-05, |
|
"loss": 0.9378, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.38479581475257874, |
|
"learning_rate": 2.2080571137174913e-05, |
|
"loss": 0.9244, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.375758558511734, |
|
"learning_rate": 2.157062723100459e-05, |
|
"loss": 0.9348, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.3782336413860321, |
|
"learning_rate": 2.106068332483427e-05, |
|
"loss": 0.9289, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.3771725296974182, |
|
"learning_rate": 2.0550739418663947e-05, |
|
"loss": 0.9457, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.378215491771698, |
|
"learning_rate": 2.0040795512493625e-05, |
|
"loss": 0.9326, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.40586233139038086, |
|
"learning_rate": 1.9530851606323306e-05, |
|
"loss": 0.9244, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.37859204411506653, |
|
"learning_rate": 1.9020907700152984e-05, |
|
"loss": 0.9304, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.3941766619682312, |
|
"learning_rate": 1.8510963793982662e-05, |
|
"loss": 0.9369, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.3579218089580536, |
|
"learning_rate": 1.8001019887812343e-05, |
|
"loss": 0.9351, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.3728456199169159, |
|
"learning_rate": 1.7491075981642018e-05, |
|
"loss": 0.9248, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.39080294966697693, |
|
"learning_rate": 1.69811320754717e-05, |
|
"loss": 0.9437, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.38086754083633423, |
|
"learning_rate": 1.6471188169301377e-05, |
|
"loss": 0.9371, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.37228095531463623, |
|
"learning_rate": 1.5961244263131055e-05, |
|
"loss": 0.934, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.37343358993530273, |
|
"learning_rate": 1.5451300356960737e-05, |
|
"loss": 0.9348, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.39512866735458374, |
|
"learning_rate": 1.4941356450790415e-05, |
|
"loss": 0.9378, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.36341536045074463, |
|
"learning_rate": 1.4431412544620093e-05, |
|
"loss": 0.9203, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.36111101508140564, |
|
"learning_rate": 1.3921468638449772e-05, |
|
"loss": 0.9311, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.34905844926834106, |
|
"learning_rate": 1.3411524732279452e-05, |
|
"loss": 0.9366, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.3797436058521271, |
|
"learning_rate": 1.2901580826109128e-05, |
|
"loss": 0.9294, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.38309234380722046, |
|
"learning_rate": 1.2391636919938808e-05, |
|
"loss": 0.9372, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.3702026605606079, |
|
"learning_rate": 1.1881693013768486e-05, |
|
"loss": 0.9225, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.36598923802375793, |
|
"learning_rate": 1.1371749107598164e-05, |
|
"loss": 0.9242, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.40574029088020325, |
|
"learning_rate": 1.0861805201427844e-05, |
|
"loss": 0.9339, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.35166865587234497, |
|
"learning_rate": 1.0351861295257523e-05, |
|
"loss": 0.9262, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.3604579567909241, |
|
"learning_rate": 9.841917389087201e-06, |
|
"loss": 0.9382, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.3669303059577942, |
|
"learning_rate": 9.33197348291688e-06, |
|
"loss": 0.9334, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.36166125535964966, |
|
"learning_rate": 8.822029576746559e-06, |
|
"loss": 0.9312, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.34307852387428284, |
|
"learning_rate": 8.312085670576237e-06, |
|
"loss": 0.9336, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.3532688021659851, |
|
"learning_rate": 7.802141764405915e-06, |
|
"loss": 0.9259, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.3652610182762146, |
|
"learning_rate": 7.2921978582355945e-06, |
|
"loss": 0.9194, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.34755492210388184, |
|
"learning_rate": 6.782253952065273e-06, |
|
"loss": 0.9336, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.3535468876361847, |
|
"learning_rate": 6.272310045894952e-06, |
|
"loss": 0.9324, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.3971847891807556, |
|
"learning_rate": 5.762366139724631e-06, |
|
"loss": 0.9365, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.3563123643398285, |
|
"learning_rate": 5.252422233554309e-06, |
|
"loss": 0.9358, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.34811973571777344, |
|
"learning_rate": 4.742478327383989e-06, |
|
"loss": 0.933, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.36352258920669556, |
|
"learning_rate": 4.2325344212136666e-06, |
|
"loss": 0.9298, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.3368126153945923, |
|
"learning_rate": 3.7225905150433454e-06, |
|
"loss": 0.9319, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.3323891758918762, |
|
"learning_rate": 3.2126466088730238e-06, |
|
"loss": 0.9384, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.3528982698917389, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.9277, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.36531731486320496, |
|
"learning_rate": 2.192758796532382e-06, |
|
"loss": 0.921, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.3461114168167114, |
|
"learning_rate": 1.6828148903620604e-06, |
|
"loss": 0.9348, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.36876726150512695, |
|
"learning_rate": 1.1728709841917388e-06, |
|
"loss": 0.9268, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 2000, |
|
"total_memory_available (GB)": 94.62 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2022, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 2.238964389820498e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|