|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.03797345732829604, |
|
"eval_steps": 1, |
|
"global_step": 392, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.687106461300009e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 9.687106461300009e-05, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 239.4378, |
|
"eval_samples_per_second": 141.026, |
|
"eval_steps_per_second": 2.205, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019374212922600018, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00019374212922600018, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 239.1345, |
|
"eval_samples_per_second": 141.205, |
|
"eval_steps_per_second": 2.208, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002906131938390003, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0002906131938390003, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.5289, |
|
"eval_samples_per_second": 141.564, |
|
"eval_steps_per_second": 2.214, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00038748425845200037, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00038748425845200037, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.6311, |
|
"eval_samples_per_second": 141.503, |
|
"eval_steps_per_second": 2.213, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0004843553230650005, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0004843553230650005, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 239.4512, |
|
"eval_samples_per_second": 141.018, |
|
"eval_steps_per_second": 2.205, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005812263876780006, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005812263876780006, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 239.7687, |
|
"eval_samples_per_second": 140.832, |
|
"eval_steps_per_second": 2.202, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0006780974522910007, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0006780974522910007, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 239.881, |
|
"eval_samples_per_second": 140.766, |
|
"eval_steps_per_second": 2.201, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0007749685169040007, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0007749685169040007, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.2807, |
|
"eval_samples_per_second": 141.711, |
|
"eval_steps_per_second": 2.216, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0008718395815170009, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008718395815170009, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.8621, |
|
"eval_samples_per_second": 141.366, |
|
"eval_steps_per_second": 2.21, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.000968710646130001, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9531, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.000968710646130001, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.2707, |
|
"eval_samples_per_second": 141.717, |
|
"eval_steps_per_second": 2.216, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001065581710743001, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.001065581710743001, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.7639, |
|
"eval_samples_per_second": 141.424, |
|
"eval_steps_per_second": 2.211, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0011624527753560012, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0011624527753560012, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 236.7358, |
|
"eval_samples_per_second": 142.636, |
|
"eval_steps_per_second": 2.23, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0012593238399690012, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9531, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0012593238399690012, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.6214, |
|
"eval_samples_per_second": 141.509, |
|
"eval_steps_per_second": 2.213, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0013561949045820013, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9609, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0013561949045820013, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.9769, |
|
"eval_samples_per_second": 141.298, |
|
"eval_steps_per_second": 2.209, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0014530659691950015, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0014530659691950015, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.6023, |
|
"eval_samples_per_second": 141.52, |
|
"eval_steps_per_second": 2.213, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0015499370338080015, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9766, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0015499370338080015, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 238.5522, |
|
"eval_samples_per_second": 141.55, |
|
"eval_steps_per_second": 2.213, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0016468080984210016, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.9688, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0016468080984210016, |
|
"eval_accuracy": 4.851833655867371e-05, |
|
"eval_loss": 10.96875, |
|
"eval_runtime": 239.2771, |
|
"eval_samples_per_second": 141.121, |
|
"eval_steps_per_second": 2.207, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0017436791630340018, |
|
"grad_norm": 3.9638593196868896, |
|
"learning_rate": 9.999990312893539e-06, |
|
"loss": 10.9609, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0017436791630340018, |
|
"eval_accuracy": 0.0007322042282688151, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 238.4478, |
|
"eval_samples_per_second": 141.612, |
|
"eval_steps_per_second": 2.214, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0018405502276470018, |
|
"grad_norm": 3.9969630241394043, |
|
"learning_rate": 9.999980625787079e-06, |
|
"loss": 10.8906, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0018405502276470018, |
|
"eval_accuracy": 0.00505525749297823, |
|
"eval_loss": 10.8046875, |
|
"eval_runtime": 237.763, |
|
"eval_samples_per_second": 142.02, |
|
"eval_steps_per_second": 2.221, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.001937421292260002, |
|
"grad_norm": 3.8709299564361572, |
|
"learning_rate": 9.999970938680617e-06, |
|
"loss": 10.8359, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001937421292260002, |
|
"eval_accuracy": 0.011155251410816828, |
|
"eval_loss": 10.71875, |
|
"eval_runtime": 238.5172, |
|
"eval_samples_per_second": 141.57, |
|
"eval_steps_per_second": 2.214, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002034292356873002, |
|
"grad_norm": 3.787830352783203, |
|
"learning_rate": 9.999961251574155e-06, |
|
"loss": 10.75, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002034292356873002, |
|
"eval_accuracy": 0.017461824594575886, |
|
"eval_loss": 10.6484375, |
|
"eval_runtime": 238.2061, |
|
"eval_samples_per_second": 141.755, |
|
"eval_steps_per_second": 2.217, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002131163421486002, |
|
"grad_norm": 3.8295083045959473, |
|
"learning_rate": 9.999951564467694e-06, |
|
"loss": 10.6719, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.002131163421486002, |
|
"eval_accuracy": 0.02801033625841584, |
|
"eval_loss": 10.578125, |
|
"eval_runtime": 236.835, |
|
"eval_samples_per_second": 142.576, |
|
"eval_steps_per_second": 2.229, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.002228034486099002, |
|
"grad_norm": 3.6312763690948486, |
|
"learning_rate": 9.999941877361234e-06, |
|
"loss": 10.6172, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002228034486099002, |
|
"eval_accuracy": 0.03920626085709957, |
|
"eval_loss": 10.5, |
|
"eval_runtime": 237.4625, |
|
"eval_samples_per_second": 142.199, |
|
"eval_steps_per_second": 2.224, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0023249055507120024, |
|
"grad_norm": 3.621922492980957, |
|
"learning_rate": 9.999932190254772e-06, |
|
"loss": 10.5391, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0023249055507120024, |
|
"eval_accuracy": 0.044748322853401586, |
|
"eval_loss": 10.4375, |
|
"eval_runtime": 237.2944, |
|
"eval_samples_per_second": 142.3, |
|
"eval_steps_per_second": 2.225, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0024217766153250024, |
|
"grad_norm": 3.4077281951904297, |
|
"learning_rate": 9.99992250314831e-06, |
|
"loss": 10.5078, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0024217766153250024, |
|
"eval_accuracy": 0.04779985410339344, |
|
"eval_loss": 10.3828125, |
|
"eval_runtime": 237.7331, |
|
"eval_samples_per_second": 142.037, |
|
"eval_steps_per_second": 2.221, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0025186476799380023, |
|
"grad_norm": 3.273934841156006, |
|
"learning_rate": 9.99991281604185e-06, |
|
"loss": 10.4609, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0025186476799380023, |
|
"eval_accuracy": 0.0499438956072986, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 236.731, |
|
"eval_samples_per_second": 142.639, |
|
"eval_steps_per_second": 2.23, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0026155187445510027, |
|
"grad_norm": 3.3044192790985107, |
|
"learning_rate": 9.999903128935388e-06, |
|
"loss": 10.3906, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0026155187445510027, |
|
"eval_accuracy": 0.05109582976502101, |
|
"eval_loss": 10.265625, |
|
"eval_runtime": 235.9963, |
|
"eval_samples_per_second": 143.083, |
|
"eval_steps_per_second": 2.237, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0027123898091640027, |
|
"grad_norm": 3.2339956760406494, |
|
"learning_rate": 9.999893441828926e-06, |
|
"loss": 10.3281, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0027123898091640027, |
|
"eval_accuracy": 0.05213538433890046, |
|
"eval_loss": 10.2109375, |
|
"eval_runtime": 235.9613, |
|
"eval_samples_per_second": 143.104, |
|
"eval_steps_per_second": 2.238, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0028092608737770026, |
|
"grad_norm": 3.157088279724121, |
|
"learning_rate": 9.999883754722464e-06, |
|
"loss": 10.2656, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0028092608737770026, |
|
"eval_accuracy": 0.053102914079034114, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 236.7001, |
|
"eval_samples_per_second": 142.657, |
|
"eval_steps_per_second": 2.231, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.002906131938390003, |
|
"grad_norm": 2.9515769481658936, |
|
"learning_rate": 9.999874067616004e-06, |
|
"loss": 10.25, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002906131938390003, |
|
"eval_accuracy": 0.05368669735769892, |
|
"eval_loss": 10.1171875, |
|
"eval_runtime": 237.6075, |
|
"eval_samples_per_second": 142.113, |
|
"eval_steps_per_second": 2.222, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003003003003003003, |
|
"grad_norm": 2.8982396125793457, |
|
"learning_rate": 9.999864380509543e-06, |
|
"loss": 10.2031, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003003003003003003, |
|
"eval_accuracy": 0.05444738729191865, |
|
"eval_loss": 10.0703125, |
|
"eval_runtime": 236.3017, |
|
"eval_samples_per_second": 142.898, |
|
"eval_steps_per_second": 2.234, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.003099874067616003, |
|
"grad_norm": 2.802349090576172, |
|
"learning_rate": 9.999854693403081e-06, |
|
"loss": 10.1641, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.003099874067616003, |
|
"eval_accuracy": 0.05520547182620385, |
|
"eval_loss": 10.03125, |
|
"eval_runtime": 236.5083, |
|
"eval_samples_per_second": 142.773, |
|
"eval_steps_per_second": 2.232, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0031967451322290033, |
|
"grad_norm": 2.7052364349365234, |
|
"learning_rate": 9.99984500629662e-06, |
|
"loss": 10.125, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0031967451322290033, |
|
"eval_accuracy": 0.05577098835643875, |
|
"eval_loss": 9.9921875, |
|
"eval_runtime": 236.8657, |
|
"eval_samples_per_second": 142.558, |
|
"eval_steps_per_second": 2.229, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0032936161968420033, |
|
"grad_norm": 2.640915632247925, |
|
"learning_rate": 9.99983531919016e-06, |
|
"loss": 10.0859, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0032936161968420033, |
|
"eval_accuracy": 0.0562093903187565, |
|
"eval_loss": 9.9609375, |
|
"eval_runtime": 236.6838, |
|
"eval_samples_per_second": 142.667, |
|
"eval_steps_per_second": 2.231, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0033904872614550032, |
|
"grad_norm": 2.5900304317474365, |
|
"learning_rate": 9.999825632083697e-06, |
|
"loss": 10.0391, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0033904872614550032, |
|
"eval_accuracy": 0.05660072138892365, |
|
"eval_loss": 9.921875, |
|
"eval_runtime": 236.5335, |
|
"eval_samples_per_second": 142.758, |
|
"eval_steps_per_second": 2.232, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0034873583260680036, |
|
"grad_norm": 2.493401288986206, |
|
"learning_rate": 9.999815944977235e-06, |
|
"loss": 10.0156, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0034873583260680036, |
|
"eval_accuracy": 0.0568301702764917, |
|
"eval_loss": 9.890625, |
|
"eval_runtime": 237.1506, |
|
"eval_samples_per_second": 142.386, |
|
"eval_steps_per_second": 2.226, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0035842293906810036, |
|
"grad_norm": 2.489300012588501, |
|
"learning_rate": 9.999806257870775e-06, |
|
"loss": 9.9609, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0035842293906810036, |
|
"eval_accuracy": 0.056685860069006624, |
|
"eval_loss": 9.859375, |
|
"eval_runtime": 236.5729, |
|
"eval_samples_per_second": 142.734, |
|
"eval_steps_per_second": 2.232, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0036811004552940035, |
|
"grad_norm": 2.457721471786499, |
|
"learning_rate": 9.999796570764313e-06, |
|
"loss": 9.9141, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0036811004552940035, |
|
"eval_accuracy": 0.05657735963617732, |
|
"eval_loss": 9.8359375, |
|
"eval_runtime": 236.757, |
|
"eval_samples_per_second": 142.623, |
|
"eval_steps_per_second": 2.23, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.003777971519907004, |
|
"grad_norm": 2.4300434589385986, |
|
"learning_rate": 9.999786883657853e-06, |
|
"loss": 9.875, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003777971519907004, |
|
"eval_accuracy": 0.056777425402261446, |
|
"eval_loss": 9.8046875, |
|
"eval_runtime": 236.7279, |
|
"eval_samples_per_second": 142.641, |
|
"eval_steps_per_second": 2.23, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003874842584520004, |
|
"grad_norm": 2.4308087825775146, |
|
"learning_rate": 9.99977719655139e-06, |
|
"loss": 9.8672, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.003874842584520004, |
|
"eval_accuracy": 0.05689365518822987, |
|
"eval_loss": 9.78125, |
|
"eval_runtime": 236.8414, |
|
"eval_samples_per_second": 142.572, |
|
"eval_steps_per_second": 2.229, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.003971713649133004, |
|
"grad_norm": 2.248959541320801, |
|
"learning_rate": 9.99976750944493e-06, |
|
"loss": 9.8438, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.003971713649133004, |
|
"eval_accuracy": 0.05678616796648622, |
|
"eval_loss": 9.7578125, |
|
"eval_runtime": 237.8773, |
|
"eval_samples_per_second": 141.951, |
|
"eval_steps_per_second": 2.22, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.004068584713746004, |
|
"grad_norm": 2.3188998699188232, |
|
"learning_rate": 9.999757822338468e-06, |
|
"loss": 9.7969, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004068584713746004, |
|
"eval_accuracy": 0.05653222831953354, |
|
"eval_loss": 9.734375, |
|
"eval_runtime": 237.6567, |
|
"eval_samples_per_second": 142.083, |
|
"eval_steps_per_second": 2.222, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004165455778359004, |
|
"grad_norm": 2.1342954635620117, |
|
"learning_rate": 9.999748135232007e-06, |
|
"loss": 9.8203, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.004165455778359004, |
|
"eval_accuracy": 0.056402595198346346, |
|
"eval_loss": 9.7109375, |
|
"eval_runtime": 238.2373, |
|
"eval_samples_per_second": 141.737, |
|
"eval_steps_per_second": 2.216, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.004262326842972004, |
|
"grad_norm": 2.0969340801239014, |
|
"learning_rate": 9.999738448125546e-06, |
|
"loss": 9.7891, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.004262326842972004, |
|
"eval_accuracy": 0.05635836129723557, |
|
"eval_loss": 9.6875, |
|
"eval_runtime": 237.2632, |
|
"eval_samples_per_second": 142.319, |
|
"eval_steps_per_second": 2.225, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.004359197907585004, |
|
"grad_norm": 2.200792074203491, |
|
"learning_rate": 9.999728761019084e-06, |
|
"loss": 9.7031, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004359197907585004, |
|
"eval_accuracy": 0.05659519215128481, |
|
"eval_loss": 9.671875, |
|
"eval_runtime": 238.714, |
|
"eval_samples_per_second": 141.454, |
|
"eval_steps_per_second": 2.212, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004456068972198004, |
|
"grad_norm": 2.0903360843658447, |
|
"learning_rate": 9.999719073912622e-06, |
|
"loss": 9.7344, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.004456068972198004, |
|
"eval_accuracy": 0.056926830614062944, |
|
"eval_loss": 9.6484375, |
|
"eval_runtime": 238.1183, |
|
"eval_samples_per_second": 141.808, |
|
"eval_steps_per_second": 2.217, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.004552940036811005, |
|
"grad_norm": 2.027561664581299, |
|
"learning_rate": 9.999709386806162e-06, |
|
"loss": 9.7266, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.004552940036811005, |
|
"eval_accuracy": 0.05733822326372602, |
|
"eval_loss": 9.6328125, |
|
"eval_runtime": 238.4469, |
|
"eval_samples_per_second": 141.612, |
|
"eval_steps_per_second": 2.214, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.004649811101424005, |
|
"grad_norm": 1.9840859174728394, |
|
"learning_rate": 9.9996996996997e-06, |
|
"loss": 9.7031, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004649811101424005, |
|
"eval_accuracy": 0.057925625153411014, |
|
"eval_loss": 9.6171875, |
|
"eval_runtime": 239.2866, |
|
"eval_samples_per_second": 141.115, |
|
"eval_steps_per_second": 2.207, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004746682166037005, |
|
"grad_norm": 1.8907567262649536, |
|
"learning_rate": 9.99969001259324e-06, |
|
"loss": 9.7109, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.004746682166037005, |
|
"eval_accuracy": 0.05849756833681777, |
|
"eval_loss": 9.6015625, |
|
"eval_runtime": 237.5376, |
|
"eval_samples_per_second": 142.154, |
|
"eval_steps_per_second": 2.223, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.004843553230650005, |
|
"grad_norm": 1.9789716005325317, |
|
"learning_rate": 9.999680325486778e-06, |
|
"loss": 9.6406, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004843553230650005, |
|
"eval_accuracy": 0.059110966328071786, |
|
"eval_loss": 9.578125, |
|
"eval_runtime": 238.4868, |
|
"eval_samples_per_second": 141.589, |
|
"eval_steps_per_second": 2.214, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004940424295263005, |
|
"grad_norm": 1.8518751859664917, |
|
"learning_rate": 9.999670638380316e-06, |
|
"loss": 9.6797, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.004940424295263005, |
|
"eval_accuracy": 0.059745178569913925, |
|
"eval_loss": 9.5625, |
|
"eval_runtime": 237.9086, |
|
"eval_samples_per_second": 141.933, |
|
"eval_steps_per_second": 2.219, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.005037295359876005, |
|
"grad_norm": 1.9054638147354126, |
|
"learning_rate": 9.999660951273856e-06, |
|
"loss": 9.6328, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.005037295359876005, |
|
"eval_accuracy": 0.06045813178755534, |
|
"eval_loss": 9.546875, |
|
"eval_runtime": 238.5328, |
|
"eval_samples_per_second": 141.561, |
|
"eval_steps_per_second": 2.214, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0051341664244890055, |
|
"grad_norm": 1.9086819887161255, |
|
"learning_rate": 9.999651264167394e-06, |
|
"loss": 9.6172, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0051341664244890055, |
|
"eval_accuracy": 0.06115973704103745, |
|
"eval_loss": 9.53125, |
|
"eval_runtime": 239.0323, |
|
"eval_samples_per_second": 141.265, |
|
"eval_steps_per_second": 2.209, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0052310374891020054, |
|
"grad_norm": 1.8585758209228516, |
|
"learning_rate": 9.999641577060932e-06, |
|
"loss": 9.6172, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0052310374891020054, |
|
"eval_accuracy": 0.06154695736908567, |
|
"eval_loss": 9.5234375, |
|
"eval_runtime": 237.7379, |
|
"eval_samples_per_second": 142.035, |
|
"eval_steps_per_second": 2.221, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005327908553715005, |
|
"grad_norm": 1.8926104307174683, |
|
"learning_rate": 9.999631889954471e-06, |
|
"loss": 9.5703, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.005327908553715005, |
|
"eval_accuracy": 0.06174404139968916, |
|
"eval_loss": 9.5078125, |
|
"eval_runtime": 238.0821, |
|
"eval_samples_per_second": 141.829, |
|
"eval_steps_per_second": 2.218, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.005424779618328005, |
|
"grad_norm": 1.8241984844207764, |
|
"learning_rate": 9.999622202848011e-06, |
|
"loss": 9.5781, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.005424779618328005, |
|
"eval_accuracy": 0.061843017648313334, |
|
"eval_loss": 9.4921875, |
|
"eval_runtime": 237.2046, |
|
"eval_samples_per_second": 142.354, |
|
"eval_steps_per_second": 2.226, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.005521650682941005, |
|
"grad_norm": 1.7987343072891235, |
|
"learning_rate": 9.999612515741549e-06, |
|
"loss": 9.5938, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005521650682941005, |
|
"eval_accuracy": 0.0619944782311743, |
|
"eval_loss": 9.4765625, |
|
"eval_runtime": 238.3243, |
|
"eval_samples_per_second": 141.685, |
|
"eval_steps_per_second": 2.215, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005618521747554005, |
|
"grad_norm": 1.8513528108596802, |
|
"learning_rate": 9.999602828635087e-06, |
|
"loss": 9.5391, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.005618521747554005, |
|
"eval_accuracy": 0.06205043643199048, |
|
"eval_loss": 9.46875, |
|
"eval_runtime": 236.9136, |
|
"eval_samples_per_second": 142.529, |
|
"eval_steps_per_second": 2.229, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.005715392812167006, |
|
"grad_norm": 1.901055932044983, |
|
"learning_rate": 9.999593141528627e-06, |
|
"loss": 9.4922, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.005715392812167006, |
|
"eval_accuracy": 0.06204551512100302, |
|
"eval_loss": 9.453125, |
|
"eval_runtime": 240.0178, |
|
"eval_samples_per_second": 140.685, |
|
"eval_steps_per_second": 2.2, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.005812263876780006, |
|
"grad_norm": 1.8602513074874878, |
|
"learning_rate": 9.999583454422165e-06, |
|
"loss": 9.4688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005812263876780006, |
|
"eval_accuracy": 0.062001570708773865, |
|
"eval_loss": 9.4375, |
|
"eval_runtime": 239.415, |
|
"eval_samples_per_second": 141.04, |
|
"eval_steps_per_second": 2.205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005909134941393006, |
|
"grad_norm": 1.8280858993530273, |
|
"learning_rate": 9.999573767315703e-06, |
|
"loss": 9.4922, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.005909134941393006, |
|
"eval_accuracy": 0.06202125595272369, |
|
"eval_loss": 9.4296875, |
|
"eval_runtime": 240.2145, |
|
"eval_samples_per_second": 140.57, |
|
"eval_steps_per_second": 2.198, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.006006006006006006, |
|
"grad_norm": 1.8284739255905151, |
|
"learning_rate": 9.999564080209243e-06, |
|
"loss": 9.4609, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.006006006006006006, |
|
"eval_accuracy": 0.06204618094543074, |
|
"eval_loss": 9.4140625, |
|
"eval_runtime": 240.548, |
|
"eval_samples_per_second": 140.375, |
|
"eval_steps_per_second": 2.195, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.006102877070619006, |
|
"grad_norm": 1.8668478727340698, |
|
"learning_rate": 9.99955439310278e-06, |
|
"loss": 9.4297, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.006102877070619006, |
|
"eval_accuracy": 0.061995375646707304, |
|
"eval_loss": 9.40625, |
|
"eval_runtime": 238.875, |
|
"eval_samples_per_second": 141.358, |
|
"eval_steps_per_second": 2.21, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.006199748135232006, |
|
"grad_norm": 1.7616581916809082, |
|
"learning_rate": 9.99954470599632e-06, |
|
"loss": 9.4844, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.006199748135232006, |
|
"eval_accuracy": 0.062048844243141596, |
|
"eval_loss": 9.390625, |
|
"eval_runtime": 238.455, |
|
"eval_samples_per_second": 141.607, |
|
"eval_steps_per_second": 2.214, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.006296619199845007, |
|
"grad_norm": 1.7911943197250366, |
|
"learning_rate": 9.999535018889858e-06, |
|
"loss": 9.4531, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.006296619199845007, |
|
"eval_accuracy": 0.06223356709850013, |
|
"eval_loss": 9.3828125, |
|
"eval_runtime": 236.8696, |
|
"eval_samples_per_second": 142.555, |
|
"eval_steps_per_second": 2.229, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.006393490264458007, |
|
"grad_norm": 1.7897166013717651, |
|
"learning_rate": 9.999525331783398e-06, |
|
"loss": 9.4375, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.006393490264458007, |
|
"eval_accuracy": 0.06252389549787181, |
|
"eval_loss": 9.3671875, |
|
"eval_runtime": 238.3072, |
|
"eval_samples_per_second": 141.695, |
|
"eval_steps_per_second": 2.216, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.006490361329071007, |
|
"grad_norm": 1.7542698383331299, |
|
"learning_rate": 9.999515644676936e-06, |
|
"loss": 9.4375, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.006490361329071007, |
|
"eval_accuracy": 0.06279815726431386, |
|
"eval_loss": 9.359375, |
|
"eval_runtime": 238.8868, |
|
"eval_samples_per_second": 141.351, |
|
"eval_steps_per_second": 2.21, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0065872323936840066, |
|
"grad_norm": 1.7762055397033691, |
|
"learning_rate": 9.999505957570474e-06, |
|
"loss": 9.3984, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0065872323936840066, |
|
"eval_accuracy": 0.06302155583425616, |
|
"eval_loss": 9.34375, |
|
"eval_runtime": 238.2147, |
|
"eval_samples_per_second": 141.75, |
|
"eval_steps_per_second": 2.216, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0066841034582970065, |
|
"grad_norm": 1.7549471855163574, |
|
"learning_rate": 9.999496270464012e-06, |
|
"loss": 9.4062, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0066841034582970065, |
|
"eval_accuracy": 0.0631751296859529, |
|
"eval_loss": 9.3359375, |
|
"eval_runtime": 238.9606, |
|
"eval_samples_per_second": 141.308, |
|
"eval_steps_per_second": 2.21, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0067809745229100065, |
|
"grad_norm": 1.7370997667312622, |
|
"learning_rate": 9.999486583357552e-06, |
|
"loss": 9.3984, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0067809745229100065, |
|
"eval_accuracy": 0.06327500335011008, |
|
"eval_loss": 9.3203125, |
|
"eval_runtime": 238.5419, |
|
"eval_samples_per_second": 141.556, |
|
"eval_steps_per_second": 2.213, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.006877845587523007, |
|
"grad_norm": 1.6883249282836914, |
|
"learning_rate": 9.99947689625109e-06, |
|
"loss": 9.4375, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.006877845587523007, |
|
"eval_accuracy": 0.06334149894621705, |
|
"eval_loss": 9.3125, |
|
"eval_runtime": 238.5418, |
|
"eval_samples_per_second": 141.556, |
|
"eval_steps_per_second": 2.213, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.006974716652136007, |
|
"grad_norm": 1.7424453496932983, |
|
"learning_rate": 9.99946720914463e-06, |
|
"loss": 9.3828, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.006974716652136007, |
|
"eval_accuracy": 0.0633868039561898, |
|
"eval_loss": 9.3046875, |
|
"eval_runtime": 237.7375, |
|
"eval_samples_per_second": 142.035, |
|
"eval_steps_per_second": 2.221, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007071587716749007, |
|
"grad_norm": 1.744436264038086, |
|
"learning_rate": 9.999457522038168e-06, |
|
"loss": 9.3594, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.007071587716749007, |
|
"eval_accuracy": 0.06341806875540422, |
|
"eval_loss": 9.2890625, |
|
"eval_runtime": 238.0776, |
|
"eval_samples_per_second": 141.832, |
|
"eval_steps_per_second": 2.218, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.007168458781362007, |
|
"grad_norm": 1.7263447046279907, |
|
"learning_rate": 9.999447834931707e-06, |
|
"loss": 9.3438, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.007168458781362007, |
|
"eval_accuracy": 0.06344229897479539, |
|
"eval_loss": 9.28125, |
|
"eval_runtime": 239.9491, |
|
"eval_samples_per_second": 140.726, |
|
"eval_steps_per_second": 2.2, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.007265329845975007, |
|
"grad_norm": 1.6880813837051392, |
|
"learning_rate": 9.999438147825245e-06, |
|
"loss": 9.3672, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007265329845975007, |
|
"eval_accuracy": 0.06342368483970755, |
|
"eval_loss": 9.2734375, |
|
"eval_runtime": 237.9155, |
|
"eval_samples_per_second": 141.929, |
|
"eval_steps_per_second": 2.219, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007362200910588007, |
|
"grad_norm": 1.7534254789352417, |
|
"learning_rate": 9.999428460718784e-06, |
|
"loss": 9.3125, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.007362200910588007, |
|
"eval_accuracy": 0.06337450067872116, |
|
"eval_loss": 9.2578125, |
|
"eval_runtime": 239.6783, |
|
"eval_samples_per_second": 140.885, |
|
"eval_steps_per_second": 2.203, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.007459071975201007, |
|
"grad_norm": 1.7223541736602783, |
|
"learning_rate": 9.999418773612323e-06, |
|
"loss": 9.3047, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.007459071975201007, |
|
"eval_accuracy": 0.06325945779716735, |
|
"eval_loss": 9.25, |
|
"eval_runtime": 238.6926, |
|
"eval_samples_per_second": 141.466, |
|
"eval_steps_per_second": 2.212, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.007555943039814008, |
|
"grad_norm": 1.7189525365829468, |
|
"learning_rate": 9.999409086505861e-06, |
|
"loss": 9.2969, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.007555943039814008, |
|
"eval_accuracy": 0.0631598736218918, |
|
"eval_loss": 9.2421875, |
|
"eval_runtime": 237.6208, |
|
"eval_samples_per_second": 142.105, |
|
"eval_steps_per_second": 2.222, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.007652814104427008, |
|
"grad_norm": 1.7101792097091675, |
|
"learning_rate": 9.9993993993994e-06, |
|
"loss": 9.2891, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.007652814104427008, |
|
"eval_accuracy": 0.06305487600453004, |
|
"eval_loss": 9.2265625, |
|
"eval_runtime": 239.1168, |
|
"eval_samples_per_second": 141.216, |
|
"eval_steps_per_second": 2.208, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.007749685169040008, |
|
"grad_norm": 1.7495548725128174, |
|
"learning_rate": 9.999389712292939e-06, |
|
"loss": 9.2812, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.007749685169040008, |
|
"eval_accuracy": 0.06309847303010126, |
|
"eval_loss": 9.21875, |
|
"eval_runtime": 238.7884, |
|
"eval_samples_per_second": 141.41, |
|
"eval_steps_per_second": 2.211, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.007846556233653008, |
|
"grad_norm": 1.6896053552627563, |
|
"learning_rate": 9.999380025186479e-06, |
|
"loss": 9.2656, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.007846556233653008, |
|
"eval_accuracy": 0.0631880698389611, |
|
"eval_loss": 9.2109375, |
|
"eval_runtime": 240.0662, |
|
"eval_samples_per_second": 140.657, |
|
"eval_steps_per_second": 2.199, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.007943427298266009, |
|
"grad_norm": 1.7166608572006226, |
|
"learning_rate": 9.999370338080017e-06, |
|
"loss": 9.2422, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.007943427298266009, |
|
"eval_accuracy": 0.06330250479386351, |
|
"eval_loss": 9.203125, |
|
"eval_runtime": 238.0584, |
|
"eval_samples_per_second": 141.843, |
|
"eval_steps_per_second": 2.218, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.008040298362879008, |
|
"grad_norm": 1.6938972473144531, |
|
"learning_rate": 9.999360650973555e-06, |
|
"loss": 9.2656, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.008040298362879008, |
|
"eval_accuracy": 0.06352989831037209, |
|
"eval_loss": 9.1875, |
|
"eval_runtime": 240.419, |
|
"eval_samples_per_second": 140.451, |
|
"eval_steps_per_second": 2.196, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.008137169427492008, |
|
"grad_norm": 1.6488450765609741, |
|
"learning_rate": 9.999350963867095e-06, |
|
"loss": 9.25, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.008137169427492008, |
|
"eval_accuracy": 0.06372730077874536, |
|
"eval_loss": 9.1796875, |
|
"eval_runtime": 239.4979, |
|
"eval_samples_per_second": 140.991, |
|
"eval_steps_per_second": 2.205, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.008234040492105008, |
|
"grad_norm": 1.695817470550537, |
|
"learning_rate": 9.999341276760633e-06, |
|
"loss": 9.2344, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.008234040492105008, |
|
"eval_accuracy": 0.06391222627632101, |
|
"eval_loss": 9.171875, |
|
"eval_runtime": 237.1194, |
|
"eval_samples_per_second": 142.405, |
|
"eval_steps_per_second": 2.227, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.008330911556718008, |
|
"grad_norm": 1.6468489170074463, |
|
"learning_rate": 9.99933158965417e-06, |
|
"loss": 9.2266, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.008330911556718008, |
|
"eval_accuracy": 0.0640310325133358, |
|
"eval_loss": 9.15625, |
|
"eval_runtime": 238.4317, |
|
"eval_samples_per_second": 141.621, |
|
"eval_steps_per_second": 2.214, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.00842778262133101, |
|
"grad_norm": 1.5998897552490234, |
|
"learning_rate": 9.99932190254771e-06, |
|
"loss": 9.25, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.00842778262133101, |
|
"eval_accuracy": 0.06412291628436041, |
|
"eval_loss": 9.1484375, |
|
"eval_runtime": 239.3988, |
|
"eval_samples_per_second": 141.049, |
|
"eval_steps_per_second": 2.206, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.008524653685944008, |
|
"grad_norm": 1.74087655544281, |
|
"learning_rate": 9.999312215441248e-06, |
|
"loss": 9.1406, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.008524653685944008, |
|
"eval_accuracy": 0.06414216729498781, |
|
"eval_loss": 9.140625, |
|
"eval_runtime": 239.1559, |
|
"eval_samples_per_second": 141.192, |
|
"eval_steps_per_second": 2.208, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.008621524750557009, |
|
"grad_norm": 1.723071813583374, |
|
"learning_rate": 9.999302528334788e-06, |
|
"loss": 9.1562, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.008621524750557009, |
|
"eval_accuracy": 0.06415910239456228, |
|
"eval_loss": 9.1328125, |
|
"eval_runtime": 239.2993, |
|
"eval_samples_per_second": 141.108, |
|
"eval_steps_per_second": 2.206, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.008718395815170008, |
|
"grad_norm": 1.605566143989563, |
|
"learning_rate": 9.999292841228326e-06, |
|
"loss": 9.2031, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008718395815170008, |
|
"eval_accuracy": 0.0641378828595399, |
|
"eval_loss": 9.1171875, |
|
"eval_runtime": 239.3127, |
|
"eval_samples_per_second": 141.1, |
|
"eval_steps_per_second": 2.206, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008815266879783009, |
|
"grad_norm": 1.699105143547058, |
|
"learning_rate": 9.999283154121864e-06, |
|
"loss": 9.1406, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.008815266879783009, |
|
"eval_accuracy": 0.06416199728337843, |
|
"eval_loss": 9.109375, |
|
"eval_runtime": 239.2239, |
|
"eval_samples_per_second": 141.152, |
|
"eval_steps_per_second": 2.207, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.008912137944396008, |
|
"grad_norm": 1.6730599403381348, |
|
"learning_rate": 9.999273467015404e-06, |
|
"loss": 9.1406, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.008912137944396008, |
|
"eval_accuracy": 0.06426725544073365, |
|
"eval_loss": 9.1015625, |
|
"eval_runtime": 239.2839, |
|
"eval_samples_per_second": 141.117, |
|
"eval_steps_per_second": 2.207, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.009009009009009009, |
|
"grad_norm": 1.6537282466888428, |
|
"learning_rate": 9.999263779908942e-06, |
|
"loss": 9.1406, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.009009009009009009, |
|
"eval_accuracy": 0.06438559849553786, |
|
"eval_loss": 9.09375, |
|
"eval_runtime": 237.9124, |
|
"eval_samples_per_second": 141.93, |
|
"eval_steps_per_second": 2.219, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.00910588007362201, |
|
"grad_norm": 1.648465871810913, |
|
"learning_rate": 9.99925409280248e-06, |
|
"loss": 9.1328, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.00910588007362201, |
|
"eval_accuracy": 0.06439923342186193, |
|
"eval_loss": 9.078125, |
|
"eval_runtime": 239.3262, |
|
"eval_samples_per_second": 141.092, |
|
"eval_steps_per_second": 2.206, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.009202751138235009, |
|
"grad_norm": 1.6511099338531494, |
|
"learning_rate": 9.99924440569602e-06, |
|
"loss": 9.125, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.009202751138235009, |
|
"eval_accuracy": 0.06449600955498583, |
|
"eval_loss": 9.0703125, |
|
"eval_runtime": 238.7857, |
|
"eval_samples_per_second": 141.411, |
|
"eval_steps_per_second": 2.211, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.00929962220284801, |
|
"grad_norm": 1.6301484107971191, |
|
"learning_rate": 9.999234718589558e-06, |
|
"loss": 9.1016, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.00929962220284801, |
|
"eval_accuracy": 0.0646362669181283, |
|
"eval_loss": 9.0625, |
|
"eval_runtime": 238.0729, |
|
"eval_samples_per_second": 141.835, |
|
"eval_steps_per_second": 2.218, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.009396493267461009, |
|
"grad_norm": 1.5811512470245361, |
|
"learning_rate": 9.999225031483096e-06, |
|
"loss": 9.125, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.009396493267461009, |
|
"eval_accuracy": 0.06484397519068705, |
|
"eval_loss": 9.0546875, |
|
"eval_runtime": 238.6104, |
|
"eval_samples_per_second": 141.515, |
|
"eval_steps_per_second": 2.213, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.00949336433207401, |
|
"grad_norm": 1.6737486124038696, |
|
"learning_rate": 9.999215344376635e-06, |
|
"loss": 9.0625, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.00949336433207401, |
|
"eval_accuracy": 0.06518476150212422, |
|
"eval_loss": 9.0390625, |
|
"eval_runtime": 238.844, |
|
"eval_samples_per_second": 141.377, |
|
"eval_steps_per_second": 2.211, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.00959023539668701, |
|
"grad_norm": 1.6142433881759644, |
|
"learning_rate": 9.999205657270175e-06, |
|
"loss": 9.0859, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.00959023539668701, |
|
"eval_accuracy": 0.06548032965025313, |
|
"eval_loss": 9.03125, |
|
"eval_runtime": 238.064, |
|
"eval_samples_per_second": 141.84, |
|
"eval_steps_per_second": 2.218, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.00968710646130001, |
|
"grad_norm": 1.640156865119934, |
|
"learning_rate": 9.999195970163713e-06, |
|
"loss": 9.0547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00968710646130001, |
|
"eval_accuracy": 0.06569275659158222, |
|
"eval_loss": 9.0234375, |
|
"eval_runtime": 238.6131, |
|
"eval_samples_per_second": 141.514, |
|
"eval_steps_per_second": 2.213, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00978397752591301, |
|
"grad_norm": 1.6266826391220093, |
|
"learning_rate": 9.999186283057251e-06, |
|
"loss": 9.0547, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.00978397752591301, |
|
"eval_accuracy": 0.06578165862712619, |
|
"eval_loss": 9.015625, |
|
"eval_runtime": 239.9035, |
|
"eval_samples_per_second": 140.752, |
|
"eval_steps_per_second": 2.201, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.00988084859052601, |
|
"grad_norm": 1.5783159732818604, |
|
"learning_rate": 9.999176595950791e-06, |
|
"loss": 9.0625, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.00988084859052601, |
|
"eval_accuracy": 0.0659039097818322, |
|
"eval_loss": 9.0078125, |
|
"eval_runtime": 239.5936, |
|
"eval_samples_per_second": 140.934, |
|
"eval_steps_per_second": 2.204, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.00997771965513901, |
|
"grad_norm": 1.6107426881790161, |
|
"learning_rate": 9.999166908844329e-06, |
|
"loss": 9.0547, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.00997771965513901, |
|
"eval_accuracy": 0.0660996332146921, |
|
"eval_loss": 8.9921875, |
|
"eval_runtime": 240.4185, |
|
"eval_samples_per_second": 140.451, |
|
"eval_steps_per_second": 2.196, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.01007459071975201, |
|
"grad_norm": 1.6132723093032837, |
|
"learning_rate": 9.999157221737867e-06, |
|
"loss": 9.0156, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01007459071975201, |
|
"eval_accuracy": 0.06618155856818915, |
|
"eval_loss": 8.984375, |
|
"eval_runtime": 238.4229, |
|
"eval_samples_per_second": 141.627, |
|
"eval_steps_per_second": 2.215, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01017146178436501, |
|
"grad_norm": 1.594551682472229, |
|
"learning_rate": 9.999147534631407e-06, |
|
"loss": 9.0391, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01017146178436501, |
|
"eval_accuracy": 0.06635901525261914, |
|
"eval_loss": 8.9765625, |
|
"eval_runtime": 239.2842, |
|
"eval_samples_per_second": 141.117, |
|
"eval_steps_per_second": 2.207, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.010268332848978011, |
|
"grad_norm": 1.6009005308151245, |
|
"learning_rate": 9.999137847524946e-06, |
|
"loss": 9.0234, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.010268332848978011, |
|
"eval_accuracy": 0.06637285282116034, |
|
"eval_loss": 8.96875, |
|
"eval_runtime": 239.6129, |
|
"eval_samples_per_second": 140.923, |
|
"eval_steps_per_second": 2.204, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01036520391359101, |
|
"grad_norm": 1.5228526592254639, |
|
"learning_rate": 9.999128160418484e-06, |
|
"loss": 9.0234, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.01036520391359101, |
|
"eval_accuracy": 0.06640226489153242, |
|
"eval_loss": 8.9609375, |
|
"eval_runtime": 240.652, |
|
"eval_samples_per_second": 140.315, |
|
"eval_steps_per_second": 2.194, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.010462074978204011, |
|
"grad_norm": 1.6139883995056152, |
|
"learning_rate": 9.999118473312022e-06, |
|
"loss": 8.9766, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.010462074978204011, |
|
"eval_accuracy": 0.06641254174682976, |
|
"eval_loss": 8.9453125, |
|
"eval_runtime": 241.0995, |
|
"eval_samples_per_second": 140.054, |
|
"eval_steps_per_second": 2.19, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.01055894604281701, |
|
"grad_norm": 1.5845446586608887, |
|
"learning_rate": 9.999108786205562e-06, |
|
"loss": 8.9922, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01055894604281701, |
|
"eval_accuracy": 0.06653754304591111, |
|
"eval_loss": 8.9375, |
|
"eval_runtime": 241.1972, |
|
"eval_samples_per_second": 139.997, |
|
"eval_steps_per_second": 2.189, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01065581710743001, |
|
"grad_norm": 1.6032410860061646, |
|
"learning_rate": 9.9990990990991e-06, |
|
"loss": 8.9453, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01065581710743001, |
|
"eval_accuracy": 0.06648474027390454, |
|
"eval_loss": 8.9296875, |
|
"eval_runtime": 240.7345, |
|
"eval_samples_per_second": 140.267, |
|
"eval_steps_per_second": 2.193, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.010752688172043012, |
|
"grad_norm": 1.5953596830368042, |
|
"learning_rate": 9.999089411992638e-06, |
|
"loss": 8.9609, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.010752688172043012, |
|
"eval_accuracy": 0.06636981318790339, |
|
"eval_loss": 8.921875, |
|
"eval_runtime": 241.4759, |
|
"eval_samples_per_second": 139.836, |
|
"eval_steps_per_second": 2.187, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.01084955923665601, |
|
"grad_norm": 1.5376003980636597, |
|
"learning_rate": 9.999079724886176e-06, |
|
"loss": 8.9766, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01084955923665601, |
|
"eval_accuracy": 0.06635956528149421, |
|
"eval_loss": 8.9140625, |
|
"eval_runtime": 238.6698, |
|
"eval_samples_per_second": 141.48, |
|
"eval_steps_per_second": 2.212, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.010946430301269012, |
|
"grad_norm": 1.5216182470321655, |
|
"learning_rate": 9.999070037779716e-06, |
|
"loss": 8.9844, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.010946430301269012, |
|
"eval_accuracy": 0.06662239223711247, |
|
"eval_loss": 8.8984375, |
|
"eval_runtime": 240.1305, |
|
"eval_samples_per_second": 140.619, |
|
"eval_steps_per_second": 2.199, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.01104330136588201, |
|
"grad_norm": 1.575067400932312, |
|
"learning_rate": 9.999060350673254e-06, |
|
"loss": 8.9453, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01104330136588201, |
|
"eval_accuracy": 0.06693289801153271, |
|
"eval_loss": 8.890625, |
|
"eval_runtime": 239.4069, |
|
"eval_samples_per_second": 141.044, |
|
"eval_steps_per_second": 2.205, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.011140172430495011, |
|
"grad_norm": 1.509313941001892, |
|
"learning_rate": 9.999050663566794e-06, |
|
"loss": 8.9688, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.011140172430495011, |
|
"eval_accuracy": 0.06726662079425849, |
|
"eval_loss": 8.8828125, |
|
"eval_runtime": 242.3159, |
|
"eval_samples_per_second": 139.351, |
|
"eval_steps_per_second": 2.179, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01123704349510801, |
|
"grad_norm": 1.48069167137146, |
|
"learning_rate": 9.999040976460332e-06, |
|
"loss": 8.9766, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.01123704349510801, |
|
"eval_accuracy": 0.06765149626236562, |
|
"eval_loss": 8.875, |
|
"eval_runtime": 241.5301, |
|
"eval_samples_per_second": 139.805, |
|
"eval_steps_per_second": 2.186, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.011333914559721011, |
|
"grad_norm": 1.5143989324569702, |
|
"learning_rate": 9.999031289353872e-06, |
|
"loss": 8.9297, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.011333914559721011, |
|
"eval_accuracy": 0.0682274054434505, |
|
"eval_loss": 8.8671875, |
|
"eval_runtime": 241.8791, |
|
"eval_samples_per_second": 139.603, |
|
"eval_steps_per_second": 2.183, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.011430785624334012, |
|
"grad_norm": 1.5148530006408691, |
|
"learning_rate": 9.99902160224741e-06, |
|
"loss": 8.9297, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.011430785624334012, |
|
"eval_accuracy": 0.06885003813002805, |
|
"eval_loss": 8.859375, |
|
"eval_runtime": 240.8979, |
|
"eval_samples_per_second": 140.171, |
|
"eval_steps_per_second": 2.192, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.011527656688947011, |
|
"grad_norm": 1.5727568864822388, |
|
"learning_rate": 9.999011915140948e-06, |
|
"loss": 8.8672, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.011527656688947011, |
|
"eval_accuracy": 0.06944707999947082, |
|
"eval_loss": 8.8515625, |
|
"eval_runtime": 239.842, |
|
"eval_samples_per_second": 140.789, |
|
"eval_steps_per_second": 2.201, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.011624527753560012, |
|
"grad_norm": 1.5209897756576538, |
|
"learning_rate": 9.999002228034487e-06, |
|
"loss": 8.8906, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.011624527753560012, |
|
"eval_accuracy": 0.0699609517132256, |
|
"eval_loss": 8.8359375, |
|
"eval_runtime": 239.7155, |
|
"eval_samples_per_second": 140.863, |
|
"eval_steps_per_second": 2.203, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.011721398818173011, |
|
"grad_norm": 1.4772562980651855, |
|
"learning_rate": 9.998992540928025e-06, |
|
"loss": 8.8984, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.011721398818173011, |
|
"eval_accuracy": 0.07026170171233542, |
|
"eval_loss": 8.828125, |
|
"eval_runtime": 240.3458, |
|
"eval_samples_per_second": 140.493, |
|
"eval_steps_per_second": 2.197, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.011818269882786012, |
|
"grad_norm": 1.4959176778793335, |
|
"learning_rate": 9.998982853821563e-06, |
|
"loss": 8.8984, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.011818269882786012, |
|
"eval_accuracy": 0.07044769831877305, |
|
"eval_loss": 8.8203125, |
|
"eval_runtime": 242.3847, |
|
"eval_samples_per_second": 139.312, |
|
"eval_steps_per_second": 2.178, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.011915140947399011, |
|
"grad_norm": 1.5008248090744019, |
|
"learning_rate": 9.998973166715103e-06, |
|
"loss": 8.8828, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.011915140947399011, |
|
"eval_accuracy": 0.07059994052161438, |
|
"eval_loss": 8.8125, |
|
"eval_runtime": 242.5169, |
|
"eval_samples_per_second": 139.236, |
|
"eval_steps_per_second": 2.177, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.012012012012012012, |
|
"grad_norm": 1.5148494243621826, |
|
"learning_rate": 9.998963479608643e-06, |
|
"loss": 8.8594, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.012012012012012012, |
|
"eval_accuracy": 0.07070221694348897, |
|
"eval_loss": 8.8046875, |
|
"eval_runtime": 241.6035, |
|
"eval_samples_per_second": 139.762, |
|
"eval_steps_per_second": 2.185, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.012108883076625013, |
|
"grad_norm": 1.513382077217102, |
|
"learning_rate": 9.99895379250218e-06, |
|
"loss": 8.8281, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.012108883076625013, |
|
"eval_accuracy": 0.07083503444237392, |
|
"eval_loss": 8.796875, |
|
"eval_runtime": 241.6868, |
|
"eval_samples_per_second": 139.714, |
|
"eval_steps_per_second": 2.185, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.012205754141238012, |
|
"grad_norm": 1.5034332275390625, |
|
"learning_rate": 9.998944105395719e-06, |
|
"loss": 8.8359, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.012205754141238012, |
|
"eval_accuracy": 0.07095690926153383, |
|
"eval_loss": 8.78125, |
|
"eval_runtime": 240.0043, |
|
"eval_samples_per_second": 140.693, |
|
"eval_steps_per_second": 2.2, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.012302625205851013, |
|
"grad_norm": 1.4775885343551636, |
|
"learning_rate": 9.998934418289259e-06, |
|
"loss": 8.8359, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.012302625205851013, |
|
"eval_accuracy": 0.07105356959910508, |
|
"eval_loss": 8.7734375, |
|
"eval_runtime": 238.0369, |
|
"eval_samples_per_second": 141.856, |
|
"eval_steps_per_second": 2.218, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.012399496270464012, |
|
"grad_norm": 1.493371605873108, |
|
"learning_rate": 9.998924731182797e-06, |
|
"loss": 8.8281, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.012399496270464012, |
|
"eval_accuracy": 0.07101825195554806, |
|
"eval_loss": 8.765625, |
|
"eval_runtime": 240.7256, |
|
"eval_samples_per_second": 140.272, |
|
"eval_steps_per_second": 2.193, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.012496367335077013, |
|
"grad_norm": 1.4377527236938477, |
|
"learning_rate": 9.998915044076335e-06, |
|
"loss": 8.8438, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.012496367335077013, |
|
"eval_accuracy": 0.07066889677321507, |
|
"eval_loss": 8.7578125, |
|
"eval_runtime": 241.9868, |
|
"eval_samples_per_second": 139.541, |
|
"eval_steps_per_second": 2.182, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.012593238399690013, |
|
"grad_norm": 1.4930979013442993, |
|
"learning_rate": 9.998905356969873e-06, |
|
"loss": 8.7578, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.012593238399690013, |
|
"eval_accuracy": 0.07018139749657541, |
|
"eval_loss": 8.75, |
|
"eval_runtime": 241.4545, |
|
"eval_samples_per_second": 139.848, |
|
"eval_steps_per_second": 2.187, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.012690109464303012, |
|
"grad_norm": 1.4573019742965698, |
|
"learning_rate": 9.998895669863412e-06, |
|
"loss": 8.7812, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.012690109464303012, |
|
"eval_accuracy": 0.06976317290930623, |
|
"eval_loss": 8.7421875, |
|
"eval_runtime": 241.0814, |
|
"eval_samples_per_second": 140.065, |
|
"eval_steps_per_second": 2.19, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.012786980528916013, |
|
"grad_norm": 1.4660903215408325, |
|
"learning_rate": 9.998885982756952e-06, |
|
"loss": 8.7734, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.012786980528916013, |
|
"eval_accuracy": 0.06973908743435586, |
|
"eval_loss": 8.734375, |
|
"eval_runtime": 240.7814, |
|
"eval_samples_per_second": 140.239, |
|
"eval_steps_per_second": 2.193, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.012883851593529012, |
|
"grad_norm": 1.4869788885116577, |
|
"learning_rate": 9.99887629565049e-06, |
|
"loss": 8.7812, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.012883851593529012, |
|
"eval_accuracy": 0.07012303653804183, |
|
"eval_loss": 8.7265625, |
|
"eval_runtime": 239.4345, |
|
"eval_samples_per_second": 141.028, |
|
"eval_steps_per_second": 2.205, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.012980722658142013, |
|
"grad_norm": 1.41987943649292, |
|
"learning_rate": 9.998866608544028e-06, |
|
"loss": 8.7891, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.012980722658142013, |
|
"eval_accuracy": 0.07073081844499253, |
|
"eval_loss": 8.71875, |
|
"eval_runtime": 238.6401, |
|
"eval_samples_per_second": 141.498, |
|
"eval_steps_per_second": 2.213, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.013077593722755012, |
|
"grad_norm": 1.4336059093475342, |
|
"learning_rate": 9.998856921437568e-06, |
|
"loss": 8.7656, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.013077593722755012, |
|
"eval_accuracy": 0.0712553722984789, |
|
"eval_loss": 8.703125, |
|
"eval_runtime": 239.3711, |
|
"eval_samples_per_second": 141.065, |
|
"eval_steps_per_second": 2.206, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.013174464787368013, |
|
"grad_norm": 1.4126527309417725, |
|
"learning_rate": 9.998847234331106e-06, |
|
"loss": 8.7891, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.013174464787368013, |
|
"eval_accuracy": 0.07193187886592499, |
|
"eval_loss": 8.6953125, |
|
"eval_runtime": 241.9039, |
|
"eval_samples_per_second": 139.588, |
|
"eval_steps_per_second": 2.183, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.013271335851981014, |
|
"grad_norm": 1.4684745073318481, |
|
"learning_rate": 9.998837547224644e-06, |
|
"loss": 8.7188, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.013271335851981014, |
|
"eval_accuracy": 0.0726333393749663, |
|
"eval_loss": 8.6875, |
|
"eval_runtime": 241.6523, |
|
"eval_samples_per_second": 139.734, |
|
"eval_steps_per_second": 2.185, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.013368206916594013, |
|
"grad_norm": 1.434983730316162, |
|
"learning_rate": 9.998827860118184e-06, |
|
"loss": 8.7266, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.013368206916594013, |
|
"eval_accuracy": 0.07333210763740858, |
|
"eval_loss": 8.6796875, |
|
"eval_runtime": 241.7758, |
|
"eval_samples_per_second": 139.662, |
|
"eval_steps_per_second": 2.184, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.013465077981207014, |
|
"grad_norm": 1.4157376289367676, |
|
"learning_rate": 9.998818173011722e-06, |
|
"loss": 8.75, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.013465077981207014, |
|
"eval_accuracy": 0.07374755313141426, |
|
"eval_loss": 8.671875, |
|
"eval_runtime": 239.433, |
|
"eval_samples_per_second": 141.029, |
|
"eval_steps_per_second": 2.205, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.013561949045820013, |
|
"grad_norm": 1.4349616765975952, |
|
"learning_rate": 9.998808485905261e-06, |
|
"loss": 8.7188, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.013561949045820013, |
|
"eval_accuracy": 0.07402439134890268, |
|
"eval_loss": 8.6640625, |
|
"eval_runtime": 240.1811, |
|
"eval_samples_per_second": 140.59, |
|
"eval_steps_per_second": 2.198, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.013658820110433014, |
|
"grad_norm": 1.3990036249160767, |
|
"learning_rate": 9.9987987987988e-06, |
|
"loss": 8.7344, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.013658820110433014, |
|
"eval_accuracy": 0.07422755464602009, |
|
"eval_loss": 8.65625, |
|
"eval_runtime": 239.6139, |
|
"eval_samples_per_second": 140.923, |
|
"eval_steps_per_second": 2.204, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.013755691175046015, |
|
"grad_norm": 1.4333348274230957, |
|
"learning_rate": 9.99878911169234e-06, |
|
"loss": 8.6641, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.013755691175046015, |
|
"eval_accuracy": 0.0741873156914756, |
|
"eval_loss": 8.6484375, |
|
"eval_runtime": 240.5249, |
|
"eval_samples_per_second": 140.389, |
|
"eval_steps_per_second": 2.195, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.013852562239659014, |
|
"grad_norm": 1.395568609237671, |
|
"learning_rate": 9.998779424585877e-06, |
|
"loss": 8.7031, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.013852562239659014, |
|
"eval_accuracy": 0.07413063376845538, |
|
"eval_loss": 8.640625, |
|
"eval_runtime": 241.0655, |
|
"eval_samples_per_second": 140.074, |
|
"eval_steps_per_second": 2.19, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.013949433304272014, |
|
"grad_norm": 1.4352083206176758, |
|
"learning_rate": 9.998769737479415e-06, |
|
"loss": 8.6797, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.013949433304272014, |
|
"eval_accuracy": 0.07412177540867797, |
|
"eval_loss": 8.6328125, |
|
"eval_runtime": 241.4457, |
|
"eval_samples_per_second": 139.853, |
|
"eval_steps_per_second": 2.187, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.014046304368885014, |
|
"grad_norm": 1.3976655006408691, |
|
"learning_rate": 9.998760050372955e-06, |
|
"loss": 8.6797, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.014046304368885014, |
|
"eval_accuracy": 0.07388228125691788, |
|
"eval_loss": 8.6171875, |
|
"eval_runtime": 242.6627, |
|
"eval_samples_per_second": 139.152, |
|
"eval_steps_per_second": 2.176, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.014143175433498014, |
|
"grad_norm": 1.4085290431976318, |
|
"learning_rate": 9.998750363266493e-06, |
|
"loss": 8.6719, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.014143175433498014, |
|
"eval_accuracy": 0.07361215918148292, |
|
"eval_loss": 8.609375, |
|
"eval_runtime": 240.5995, |
|
"eval_samples_per_second": 140.345, |
|
"eval_steps_per_second": 2.195, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.014240046498111014, |
|
"grad_norm": 1.4239732027053833, |
|
"learning_rate": 9.998740676160031e-06, |
|
"loss": 8.6641, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.014240046498111014, |
|
"eval_accuracy": 0.07357530724685334, |
|
"eval_loss": 8.6015625, |
|
"eval_runtime": 240.087, |
|
"eval_samples_per_second": 140.645, |
|
"eval_steps_per_second": 2.199, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.014336917562724014, |
|
"grad_norm": 1.4121946096420288, |
|
"learning_rate": 9.99873098905357e-06, |
|
"loss": 8.6484, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.014336917562724014, |
|
"eval_accuracy": 0.07371012221902144, |
|
"eval_loss": 8.59375, |
|
"eval_runtime": 240.6261, |
|
"eval_samples_per_second": 140.33, |
|
"eval_steps_per_second": 2.194, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.014433788627337015, |
|
"grad_norm": 1.4172465801239014, |
|
"learning_rate": 9.99872130194711e-06, |
|
"loss": 8.6172, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.014433788627337015, |
|
"eval_accuracy": 0.07406176436351918, |
|
"eval_loss": 8.5859375, |
|
"eval_runtime": 238.2968, |
|
"eval_samples_per_second": 141.701, |
|
"eval_steps_per_second": 2.216, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.014530659691950014, |
|
"grad_norm": 1.366936445236206, |
|
"learning_rate": 9.998711614840649e-06, |
|
"loss": 8.6719, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.014530659691950014, |
|
"eval_accuracy": 0.0746068140298239, |
|
"eval_loss": 8.578125, |
|
"eval_runtime": 241.6018, |
|
"eval_samples_per_second": 139.763, |
|
"eval_steps_per_second": 2.185, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.014627530756563015, |
|
"grad_norm": 1.3992184400558472, |
|
"learning_rate": 9.998701927734187e-06, |
|
"loss": 8.6406, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.014627530756563015, |
|
"eval_accuracy": 0.07503204424802817, |
|
"eval_loss": 8.5703125, |
|
"eval_runtime": 242.6844, |
|
"eval_samples_per_second": 139.14, |
|
"eval_steps_per_second": 2.176, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.014724401821176014, |
|
"grad_norm": 1.3632800579071045, |
|
"learning_rate": 9.998692240627725e-06, |
|
"loss": 8.6172, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.014724401821176014, |
|
"eval_accuracy": 0.07536941459066229, |
|
"eval_loss": 8.5625, |
|
"eval_runtime": 242.1383, |
|
"eval_samples_per_second": 139.453, |
|
"eval_steps_per_second": 2.181, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.014821272885789015, |
|
"grad_norm": 1.3528015613555908, |
|
"learning_rate": 9.998682553521264e-06, |
|
"loss": 8.6094, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.014821272885789015, |
|
"eval_accuracy": 0.07561990931992374, |
|
"eval_loss": 8.5546875, |
|
"eval_runtime": 241.5544, |
|
"eval_samples_per_second": 139.79, |
|
"eval_steps_per_second": 2.186, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.014918143950402014, |
|
"grad_norm": 1.3605914115905762, |
|
"learning_rate": 9.998672866414802e-06, |
|
"loss": 8.6016, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.014918143950402014, |
|
"eval_accuracy": 0.07561860661995648, |
|
"eval_loss": 8.546875, |
|
"eval_runtime": 241.8113, |
|
"eval_samples_per_second": 139.642, |
|
"eval_steps_per_second": 2.184, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.015015015015015015, |
|
"grad_norm": 1.3871914148330688, |
|
"learning_rate": 9.99866317930834e-06, |
|
"loss": 8.5625, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.015015015015015015, |
|
"eval_accuracy": 0.07551598281142397, |
|
"eval_loss": 8.5390625, |
|
"eval_runtime": 239.4463, |
|
"eval_samples_per_second": 141.021, |
|
"eval_steps_per_second": 2.205, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.015111886079628016, |
|
"grad_norm": 1.3802464008331299, |
|
"learning_rate": 9.99865349220188e-06, |
|
"loss": 8.5312, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.015111886079628016, |
|
"eval_accuracy": 0.07555937719477805, |
|
"eval_loss": 8.53125, |
|
"eval_runtime": 240.4355, |
|
"eval_samples_per_second": 140.441, |
|
"eval_steps_per_second": 2.196, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.015208757144241015, |
|
"grad_norm": 1.372429609298706, |
|
"learning_rate": 9.99864380509542e-06, |
|
"loss": 8.5703, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.015208757144241015, |
|
"eval_accuracy": 0.07562842029304323, |
|
"eval_loss": 8.5234375, |
|
"eval_runtime": 241.532, |
|
"eval_samples_per_second": 139.803, |
|
"eval_steps_per_second": 2.186, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.015305628208854016, |
|
"grad_norm": 1.2991220951080322, |
|
"learning_rate": 9.998634117988958e-06, |
|
"loss": 8.6172, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.015305628208854016, |
|
"eval_accuracy": 0.07567942823398378, |
|
"eval_loss": 8.515625, |
|
"eval_runtime": 240.874, |
|
"eval_samples_per_second": 140.185, |
|
"eval_steps_per_second": 2.192, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.015402499273467015, |
|
"grad_norm": 1.3360435962677002, |
|
"learning_rate": 9.998624430882496e-06, |
|
"loss": 8.5781, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.015402499273467015, |
|
"eval_accuracy": 0.07568432059608309, |
|
"eval_loss": 8.5078125, |
|
"eval_runtime": 242.8721, |
|
"eval_samples_per_second": 139.032, |
|
"eval_steps_per_second": 2.174, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.015499370338080016, |
|
"grad_norm": 1.323406457901001, |
|
"learning_rate": 9.998614743776036e-06, |
|
"loss": 8.6016, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.015499370338080016, |
|
"eval_accuracy": 0.0758624431049408, |
|
"eval_loss": 8.5, |
|
"eval_runtime": 241.6758, |
|
"eval_samples_per_second": 139.72, |
|
"eval_steps_per_second": 2.185, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.015596241402693016, |
|
"grad_norm": 1.318803310394287, |
|
"learning_rate": 9.998605056669574e-06, |
|
"loss": 8.5547, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.015596241402693016, |
|
"eval_accuracy": 0.076235681120007, |
|
"eval_loss": 8.4921875, |
|
"eval_runtime": 241.1344, |
|
"eval_samples_per_second": 140.034, |
|
"eval_steps_per_second": 2.19, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.015693112467306015, |
|
"grad_norm": 1.3205766677856445, |
|
"learning_rate": 9.998595369563112e-06, |
|
"loss": 8.5547, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.015693112467306015, |
|
"eval_accuracy": 0.07658955232889318, |
|
"eval_loss": 8.484375, |
|
"eval_runtime": 241.2454, |
|
"eval_samples_per_second": 139.97, |
|
"eval_steps_per_second": 2.189, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.015789983531919016, |
|
"grad_norm": 1.3163981437683105, |
|
"learning_rate": 9.998585682456651e-06, |
|
"loss": 8.5312, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.015789983531919016, |
|
"eval_accuracy": 0.07672393306773886, |
|
"eval_loss": 8.4765625, |
|
"eval_runtime": 239.2463, |
|
"eval_samples_per_second": 141.139, |
|
"eval_steps_per_second": 2.207, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.015886854596532017, |
|
"grad_norm": 1.330255389213562, |
|
"learning_rate": 9.99857599535019e-06, |
|
"loss": 8.5, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.015886854596532017, |
|
"eval_accuracy": 0.07668372306208254, |
|
"eval_loss": 8.46875, |
|
"eval_runtime": 242.9928, |
|
"eval_samples_per_second": 138.963, |
|
"eval_steps_per_second": 2.173, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.015983725661145014, |
|
"grad_norm": 1.3156908750534058, |
|
"learning_rate": 9.998566308243727e-06, |
|
"loss": 8.5312, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.015983725661145014, |
|
"eval_accuracy": 0.07662301724360787, |
|
"eval_loss": 8.4609375, |
|
"eval_runtime": 242.8743, |
|
"eval_samples_per_second": 139.031, |
|
"eval_steps_per_second": 2.174, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.016080596725758015, |
|
"grad_norm": 1.3243170976638794, |
|
"learning_rate": 9.998556621137267e-06, |
|
"loss": 8.5312, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.016080596725758015, |
|
"eval_accuracy": 0.07658451522235309, |
|
"eval_loss": 8.453125, |
|
"eval_runtime": 242.3105, |
|
"eval_samples_per_second": 139.354, |
|
"eval_steps_per_second": 2.179, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.016177467790371016, |
|
"grad_norm": 1.343295931816101, |
|
"learning_rate": 9.998546934030807e-06, |
|
"loss": 8.4531, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.016177467790371016, |
|
"eval_accuracy": 0.07665937704713872, |
|
"eval_loss": 8.4453125, |
|
"eval_runtime": 242.1425, |
|
"eval_samples_per_second": 139.451, |
|
"eval_steps_per_second": 2.181, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.016274338854984017, |
|
"grad_norm": 1.3232625722885132, |
|
"learning_rate": 9.998537246924345e-06, |
|
"loss": 8.4766, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.016274338854984017, |
|
"eval_accuracy": 0.07678096237741702, |
|
"eval_loss": 8.4375, |
|
"eval_runtime": 241.0831, |
|
"eval_samples_per_second": 140.064, |
|
"eval_steps_per_second": 2.19, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.016371209919597018, |
|
"grad_norm": 1.3026939630508423, |
|
"learning_rate": 9.998527559817883e-06, |
|
"loss": 8.4766, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.016371209919597018, |
|
"eval_accuracy": 0.07695766639075481, |
|
"eval_loss": 8.4296875, |
|
"eval_runtime": 241.0848, |
|
"eval_samples_per_second": 140.063, |
|
"eval_steps_per_second": 2.19, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.016468080984210015, |
|
"grad_norm": 1.3087149858474731, |
|
"learning_rate": 9.998517872711423e-06, |
|
"loss": 8.4688, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.016468080984210015, |
|
"eval_accuracy": 0.0772238514173998, |
|
"eval_loss": 8.421875, |
|
"eval_runtime": 239.3699, |
|
"eval_samples_per_second": 141.066, |
|
"eval_steps_per_second": 2.206, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.016564952048823016, |
|
"grad_norm": 1.2863601446151733, |
|
"learning_rate": 9.99850818560496e-06, |
|
"loss": 8.4922, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.016564952048823016, |
|
"eval_accuracy": 0.077486128344143, |
|
"eval_loss": 8.4140625, |
|
"eval_runtime": 240.0556, |
|
"eval_samples_per_second": 140.663, |
|
"eval_steps_per_second": 2.199, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.016661823113436017, |
|
"grad_norm": 1.267603874206543, |
|
"learning_rate": 9.998498498498499e-06, |
|
"loss": 8.4375, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.016661823113436017, |
|
"eval_accuracy": 0.07766749312847479, |
|
"eval_loss": 8.4140625, |
|
"eval_runtime": 240.7221, |
|
"eval_samples_per_second": 140.274, |
|
"eval_steps_per_second": 2.193, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.016758694178049018, |
|
"grad_norm": 1.27737295627594, |
|
"learning_rate": 9.998488811392037e-06, |
|
"loss": 8.4609, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.016758694178049018, |
|
"eval_accuracy": 0.07765209231997287, |
|
"eval_loss": 8.40625, |
|
"eval_runtime": 241.2721, |
|
"eval_samples_per_second": 139.954, |
|
"eval_steps_per_second": 2.188, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.01685556524266202, |
|
"grad_norm": 1.3177382946014404, |
|
"learning_rate": 9.998479124285576e-06, |
|
"loss": 8.4141, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.01685556524266202, |
|
"eval_accuracy": 0.07766830369734332, |
|
"eval_loss": 8.3984375, |
|
"eval_runtime": 241.2269, |
|
"eval_samples_per_second": 139.98, |
|
"eval_steps_per_second": 2.189, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.016952436307275016, |
|
"grad_norm": 1.2761859893798828, |
|
"learning_rate": 9.998469437179116e-06, |
|
"loss": 8.4531, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.016952436307275016, |
|
"eval_accuracy": 0.0777974157385436, |
|
"eval_loss": 8.390625, |
|
"eval_runtime": 242.9014, |
|
"eval_samples_per_second": 139.015, |
|
"eval_steps_per_second": 2.174, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.017049307371888017, |
|
"grad_norm": 1.25885808467865, |
|
"learning_rate": 9.998459750072654e-06, |
|
"loss": 8.3984, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.017049307371888017, |
|
"eval_accuracy": 0.07782648042225775, |
|
"eval_loss": 8.3828125, |
|
"eval_runtime": 240.1918, |
|
"eval_samples_per_second": 140.584, |
|
"eval_steps_per_second": 2.198, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.017146178436501017, |
|
"grad_norm": 1.258347988128662, |
|
"learning_rate": 9.998450062966192e-06, |
|
"loss": 8.4141, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.017146178436501017, |
|
"eval_accuracy": 0.07790733466689281, |
|
"eval_loss": 8.375, |
|
"eval_runtime": 239.9645, |
|
"eval_samples_per_second": 140.717, |
|
"eval_steps_per_second": 2.2, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.017243049501114018, |
|
"grad_norm": 1.2390292882919312, |
|
"learning_rate": 9.998440375859732e-06, |
|
"loss": 8.4453, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.017243049501114018, |
|
"eval_accuracy": 0.07808157802473688, |
|
"eval_loss": 8.3671875, |
|
"eval_runtime": 238.5786, |
|
"eval_samples_per_second": 141.534, |
|
"eval_steps_per_second": 2.213, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.01733992056572702, |
|
"grad_norm": 1.2472869157791138, |
|
"learning_rate": 9.99843068875327e-06, |
|
"loss": 8.4219, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.01733992056572702, |
|
"eval_accuracy": 0.07831968262986522, |
|
"eval_loss": 8.359375, |
|
"eval_runtime": 240.78, |
|
"eval_samples_per_second": 140.24, |
|
"eval_steps_per_second": 2.193, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.017436791630340016, |
|
"grad_norm": 1.2041652202606201, |
|
"learning_rate": 9.998421001646808e-06, |
|
"loss": 8.4219, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.017436791630340016, |
|
"eval_accuracy": 0.07850064212976275, |
|
"eval_loss": 8.3515625, |
|
"eval_runtime": 243.4958, |
|
"eval_samples_per_second": 138.676, |
|
"eval_steps_per_second": 2.168, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.017533662694953017, |
|
"grad_norm": 1.1937860250473022, |
|
"learning_rate": 9.998411314540348e-06, |
|
"loss": 8.4062, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.017533662694953017, |
|
"eval_accuracy": 0.07853622031331324, |
|
"eval_loss": 8.34375, |
|
"eval_runtime": 242.9108, |
|
"eval_samples_per_second": 139.01, |
|
"eval_steps_per_second": 2.174, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.017630533759566018, |
|
"grad_norm": 1.2295143604278564, |
|
"learning_rate": 9.998401627433887e-06, |
|
"loss": 8.3984, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.017630533759566018, |
|
"eval_accuracy": 0.07868744930506891, |
|
"eval_loss": 8.3359375, |
|
"eval_runtime": 242.8021, |
|
"eval_samples_per_second": 139.072, |
|
"eval_steps_per_second": 2.175, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.01772740482417902, |
|
"grad_norm": 1.229699730873108, |
|
"learning_rate": 9.998391940327426e-06, |
|
"loss": 8.3828, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.01772740482417902, |
|
"eval_accuracy": 0.0789600320359976, |
|
"eval_loss": 8.328125, |
|
"eval_runtime": 242.7645, |
|
"eval_samples_per_second": 139.094, |
|
"eval_steps_per_second": 2.175, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.017824275888792016, |
|
"grad_norm": 1.2546368837356567, |
|
"learning_rate": 9.998382253220964e-06, |
|
"loss": 8.375, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.017824275888792016, |
|
"eval_accuracy": 0.07920493962984389, |
|
"eval_loss": 8.3203125, |
|
"eval_runtime": 239.9494, |
|
"eval_samples_per_second": 140.726, |
|
"eval_steps_per_second": 2.2, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.017921146953405017, |
|
"grad_norm": 1.2304656505584717, |
|
"learning_rate": 9.998372566114503e-06, |
|
"loss": 8.3594, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.017921146953405017, |
|
"eval_accuracy": 0.07946394533222483, |
|
"eval_loss": 8.3125, |
|
"eval_runtime": 239.6503, |
|
"eval_samples_per_second": 140.901, |
|
"eval_steps_per_second": 2.203, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"grad_norm": 1.1984411478042603, |
|
"learning_rate": 9.998362879008041e-06, |
|
"loss": 8.375, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"eval_accuracy": 0.07966461902496035, |
|
"eval_loss": 8.3125, |
|
"eval_runtime": 240.948, |
|
"eval_samples_per_second": 140.142, |
|
"eval_steps_per_second": 2.191, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.01811488908263102, |
|
"grad_norm": 1.2182241678237915, |
|
"learning_rate": 9.99835319190158e-06, |
|
"loss": 8.3125, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.01811488908263102, |
|
"eval_accuracy": 0.07963677019454897, |
|
"eval_loss": 8.3046875, |
|
"eval_runtime": 241.9831, |
|
"eval_samples_per_second": 139.543, |
|
"eval_steps_per_second": 2.182, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.01821176014724402, |
|
"grad_norm": 1.2160634994506836, |
|
"learning_rate": 9.998343504795119e-06, |
|
"loss": 8.3438, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.01821176014724402, |
|
"eval_accuracy": 0.07958732549356913, |
|
"eval_loss": 8.296875, |
|
"eval_runtime": 243.0735, |
|
"eval_samples_per_second": 138.917, |
|
"eval_steps_per_second": 2.172, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.018308631211857017, |
|
"grad_norm": 1.2408748865127563, |
|
"learning_rate": 9.998333817688657e-06, |
|
"loss": 8.3281, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.018308631211857017, |
|
"eval_accuracy": 0.0795488524212025, |
|
"eval_loss": 8.2890625, |
|
"eval_runtime": 242.5702, |
|
"eval_samples_per_second": 139.205, |
|
"eval_steps_per_second": 2.177, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.018405502276470018, |
|
"grad_norm": 1.1811424493789673, |
|
"learning_rate": 9.998324130582195e-06, |
|
"loss": 8.3359, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.018405502276470018, |
|
"eval_accuracy": 0.07950351846234159, |
|
"eval_loss": 8.28125, |
|
"eval_runtime": 241.8027, |
|
"eval_samples_per_second": 139.647, |
|
"eval_steps_per_second": 2.184, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01850237334108302, |
|
"grad_norm": 1.2242190837860107, |
|
"learning_rate": 9.998314443475735e-06, |
|
"loss": 8.3047, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.01850237334108302, |
|
"eval_accuracy": 0.07975195782054359, |
|
"eval_loss": 8.2734375, |
|
"eval_runtime": 241.7121, |
|
"eval_samples_per_second": 139.699, |
|
"eval_steps_per_second": 2.184, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.01859924440569602, |
|
"grad_norm": 1.2018407583236694, |
|
"learning_rate": 9.998304756369273e-06, |
|
"loss": 8.3359, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01859924440569602, |
|
"eval_accuracy": 0.08004521005761958, |
|
"eval_loss": 8.265625, |
|
"eval_runtime": 241.0872, |
|
"eval_samples_per_second": 140.061, |
|
"eval_steps_per_second": 2.19, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01869611547030902, |
|
"grad_norm": 1.1608692407608032, |
|
"learning_rate": 9.998295069262813e-06, |
|
"loss": 8.3047, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.01869611547030902, |
|
"eval_accuracy": 0.08031692432190342, |
|
"eval_loss": 8.2578125, |
|
"eval_runtime": 241.524, |
|
"eval_samples_per_second": 139.808, |
|
"eval_steps_per_second": 2.186, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.018792986534922018, |
|
"grad_norm": 1.1747246980667114, |
|
"learning_rate": 9.99828538215635e-06, |
|
"loss": 8.2969, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.018792986534922018, |
|
"eval_accuracy": 0.08048132505777257, |
|
"eval_loss": 8.2578125, |
|
"eval_runtime": 239.0651, |
|
"eval_samples_per_second": 141.246, |
|
"eval_steps_per_second": 2.209, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.01888985759953502, |
|
"grad_norm": 1.14412260055542, |
|
"learning_rate": 9.998275695049889e-06, |
|
"loss": 8.3203, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.01888985759953502, |
|
"eval_accuracy": 0.08070339197885944, |
|
"eval_loss": 8.25, |
|
"eval_runtime": 241.1404, |
|
"eval_samples_per_second": 140.03, |
|
"eval_steps_per_second": 2.19, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.01898672866414802, |
|
"grad_norm": 1.1623828411102295, |
|
"learning_rate": 9.998266007943428e-06, |
|
"loss": 8.2734, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.01898672866414802, |
|
"eval_accuracy": 0.08091521099353713, |
|
"eval_loss": 8.2421875, |
|
"eval_runtime": 240.6022, |
|
"eval_samples_per_second": 140.344, |
|
"eval_steps_per_second": 2.194, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.01908359972876102, |
|
"grad_norm": 1.158718466758728, |
|
"learning_rate": 9.998256320836966e-06, |
|
"loss": 8.25, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.01908359972876102, |
|
"eval_accuracy": 0.08093220399088794, |
|
"eval_loss": 8.234375, |
|
"eval_runtime": 239.6707, |
|
"eval_samples_per_second": 140.889, |
|
"eval_steps_per_second": 2.203, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.01918047079337402, |
|
"grad_norm": 1.1638875007629395, |
|
"learning_rate": 9.998246633730504e-06, |
|
"loss": 8.2734, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.01918047079337402, |
|
"eval_accuracy": 0.08096642157669483, |
|
"eval_loss": 8.2265625, |
|
"eval_runtime": 239.016, |
|
"eval_samples_per_second": 141.275, |
|
"eval_steps_per_second": 2.209, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.019277341857987018, |
|
"grad_norm": 1.2000994682312012, |
|
"learning_rate": 9.998236946624044e-06, |
|
"loss": 8.2109, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.019277341857987018, |
|
"eval_accuracy": 0.08091442937355676, |
|
"eval_loss": 8.21875, |
|
"eval_runtime": 238.0069, |
|
"eval_samples_per_second": 141.874, |
|
"eval_steps_per_second": 2.218, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.01937421292260002, |
|
"grad_norm": 1.147640347480774, |
|
"learning_rate": 9.998227259517584e-06, |
|
"loss": 8.25, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01937421292260002, |
|
"eval_accuracy": 0.08094085970844822, |
|
"eval_loss": 8.2109375, |
|
"eval_runtime": 239.3683, |
|
"eval_samples_per_second": 141.067, |
|
"eval_steps_per_second": 2.206, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01947108398721302, |
|
"grad_norm": 1.143306851387024, |
|
"learning_rate": 9.998217572411122e-06, |
|
"loss": 8.2734, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.01947108398721302, |
|
"eval_accuracy": 0.08100220240246243, |
|
"eval_loss": 8.203125, |
|
"eval_runtime": 240.6044, |
|
"eval_samples_per_second": 140.342, |
|
"eval_steps_per_second": 2.194, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.01956795505182602, |
|
"grad_norm": 1.1851661205291748, |
|
"learning_rate": 9.99820788530466e-06, |
|
"loss": 8.2188, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.01956795505182602, |
|
"eval_accuracy": 0.08122783003679317, |
|
"eval_loss": 8.203125, |
|
"eval_runtime": 239.3597, |
|
"eval_samples_per_second": 141.072, |
|
"eval_steps_per_second": 2.206, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.019664826116439018, |
|
"grad_norm": 1.1528866291046143, |
|
"learning_rate": 9.9981981981982e-06, |
|
"loss": 8.2578, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.019664826116439018, |
|
"eval_accuracy": 0.08159895478302359, |
|
"eval_loss": 8.1953125, |
|
"eval_runtime": 239.5354, |
|
"eval_samples_per_second": 140.969, |
|
"eval_steps_per_second": 2.204, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.01976169718105202, |
|
"grad_norm": 1.121721625328064, |
|
"learning_rate": 9.998188511091738e-06, |
|
"loss": 8.2344, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.01976169718105202, |
|
"eval_accuracy": 0.08193849629226983, |
|
"eval_loss": 8.1875, |
|
"eval_runtime": 240.2982, |
|
"eval_samples_per_second": 140.521, |
|
"eval_steps_per_second": 2.197, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.01985856824566502, |
|
"grad_norm": 1.120973825454712, |
|
"learning_rate": 9.998178823985276e-06, |
|
"loss": 8.2969, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.01985856824566502, |
|
"eval_accuracy": 0.0822807879458914, |
|
"eval_loss": 8.1796875, |
|
"eval_runtime": 239.9882, |
|
"eval_samples_per_second": 140.703, |
|
"eval_steps_per_second": 2.2, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.01995543931027802, |
|
"grad_norm": 1.1114741563796997, |
|
"learning_rate": 9.998169136878815e-06, |
|
"loss": 8.2812, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.01995543931027802, |
|
"eval_accuracy": 0.08245699982813046, |
|
"eval_loss": 8.171875, |
|
"eval_runtime": 237.9497, |
|
"eval_samples_per_second": 141.908, |
|
"eval_steps_per_second": 2.219, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.02005231037489102, |
|
"grad_norm": 1.1044974327087402, |
|
"learning_rate": 9.998159449772353e-06, |
|
"loss": 8.2578, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.02005231037489102, |
|
"eval_accuracy": 0.08243728563529247, |
|
"eval_loss": 8.1640625, |
|
"eval_runtime": 239.4885, |
|
"eval_samples_per_second": 140.996, |
|
"eval_steps_per_second": 2.205, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.02014918143950402, |
|
"grad_norm": 1.156922459602356, |
|
"learning_rate": 9.998149762665893e-06, |
|
"loss": 8.2031, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.02014918143950402, |
|
"eval_accuracy": 0.08235234959742663, |
|
"eval_loss": 8.1640625, |
|
"eval_runtime": 241.2039, |
|
"eval_samples_per_second": 139.994, |
|
"eval_steps_per_second": 2.189, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.02024605250411702, |
|
"grad_norm": 1.1297597885131836, |
|
"learning_rate": 9.998140075559431e-06, |
|
"loss": 8.1953, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.02024605250411702, |
|
"eval_accuracy": 0.08222390338065405, |
|
"eval_loss": 8.15625, |
|
"eval_runtime": 240.9498, |
|
"eval_samples_per_second": 140.141, |
|
"eval_steps_per_second": 2.191, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.02034292356873002, |
|
"grad_norm": 1.0749586820602417, |
|
"learning_rate": 9.998130388452971e-06, |
|
"loss": 8.2344, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02034292356873002, |
|
"eval_accuracy": 0.08212646142310244, |
|
"eval_loss": 8.1484375, |
|
"eval_runtime": 240.1358, |
|
"eval_samples_per_second": 140.616, |
|
"eval_steps_per_second": 2.199, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02043979463334302, |
|
"grad_norm": 1.1359708309173584, |
|
"learning_rate": 9.998120701346509e-06, |
|
"loss": 8.1484, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.02043979463334302, |
|
"eval_accuracy": 0.08224295174906432, |
|
"eval_loss": 8.140625, |
|
"eval_runtime": 239.69, |
|
"eval_samples_per_second": 140.878, |
|
"eval_steps_per_second": 2.203, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.020536665697956022, |
|
"grad_norm": 1.0708686113357544, |
|
"learning_rate": 9.998111014240047e-06, |
|
"loss": 8.2188, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.020536665697956022, |
|
"eval_accuracy": 0.08242179798012607, |
|
"eval_loss": 8.1328125, |
|
"eval_runtime": 239.7594, |
|
"eval_samples_per_second": 140.837, |
|
"eval_steps_per_second": 2.202, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.02063353676256902, |
|
"grad_norm": 1.1410462856292725, |
|
"learning_rate": 9.998101327133585e-06, |
|
"loss": 8.1406, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.02063353676256902, |
|
"eval_accuracy": 0.08258810934261389, |
|
"eval_loss": 8.1328125, |
|
"eval_runtime": 236.9832, |
|
"eval_samples_per_second": 142.487, |
|
"eval_steps_per_second": 2.228, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.02073040782718202, |
|
"grad_norm": 1.1414846181869507, |
|
"learning_rate": 9.998091640027125e-06, |
|
"loss": 8.1641, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.02073040782718202, |
|
"eval_accuracy": 0.08288260638188082, |
|
"eval_loss": 8.125, |
|
"eval_runtime": 238.8778, |
|
"eval_samples_per_second": 141.357, |
|
"eval_steps_per_second": 2.21, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.02082727889179502, |
|
"grad_norm": 1.1182080507278442, |
|
"learning_rate": 9.998081952920663e-06, |
|
"loss": 8.1328, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.02082727889179502, |
|
"eval_accuracy": 0.08311975567369982, |
|
"eval_loss": 8.1171875, |
|
"eval_runtime": 241.545, |
|
"eval_samples_per_second": 139.796, |
|
"eval_steps_per_second": 2.186, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.020924149956408022, |
|
"grad_norm": 1.0699214935302734, |
|
"learning_rate": 9.998072265814202e-06, |
|
"loss": 8.1875, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.020924149956408022, |
|
"eval_accuracy": 0.08333493275940426, |
|
"eval_loss": 8.109375, |
|
"eval_runtime": 240.2573, |
|
"eval_samples_per_second": 140.545, |
|
"eval_steps_per_second": 2.198, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.021021021021021023, |
|
"grad_norm": 1.0673527717590332, |
|
"learning_rate": 9.99806257870774e-06, |
|
"loss": 8.1719, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.021021021021021023, |
|
"eval_accuracy": 0.0834616709917753, |
|
"eval_loss": 8.1015625, |
|
"eval_runtime": 241.6637, |
|
"eval_samples_per_second": 139.727, |
|
"eval_steps_per_second": 2.185, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.02111789208563402, |
|
"grad_norm": 1.0935176610946655, |
|
"learning_rate": 9.99805289160128e-06, |
|
"loss": 8.125, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.02111789208563402, |
|
"eval_accuracy": 0.08352278209468422, |
|
"eval_loss": 8.1015625, |
|
"eval_runtime": 239.5275, |
|
"eval_samples_per_second": 140.973, |
|
"eval_steps_per_second": 2.204, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.02121476315024702, |
|
"grad_norm": 1.0975334644317627, |
|
"learning_rate": 9.998043204494818e-06, |
|
"loss": 8.1172, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.02121476315024702, |
|
"eval_accuracy": 0.08350500747735307, |
|
"eval_loss": 8.09375, |
|
"eval_runtime": 238.965, |
|
"eval_samples_per_second": 141.305, |
|
"eval_steps_per_second": 2.21, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.02131163421486002, |
|
"grad_norm": 1.051080346107483, |
|
"learning_rate": 9.998033517388356e-06, |
|
"loss": 8.1172, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02131163421486002, |
|
"eval_accuracy": 0.0834144553551839, |
|
"eval_loss": 8.0859375, |
|
"eval_runtime": 238.1182, |
|
"eval_samples_per_second": 141.808, |
|
"eval_steps_per_second": 2.217, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.021408505279473022, |
|
"grad_norm": 1.077547550201416, |
|
"learning_rate": 9.998023830281896e-06, |
|
"loss": 8.1562, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.021408505279473022, |
|
"eval_accuracy": 0.08347733234027067, |
|
"eval_loss": 8.078125, |
|
"eval_runtime": 237.0397, |
|
"eval_samples_per_second": 142.453, |
|
"eval_steps_per_second": 2.227, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"grad_norm": 1.069908857345581, |
|
"learning_rate": 9.998014143175434e-06, |
|
"loss": 8.0781, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"eval_accuracy": 0.08379307786344815, |
|
"eval_loss": 8.078125, |
|
"eval_runtime": 239.359, |
|
"eval_samples_per_second": 141.073, |
|
"eval_steps_per_second": 2.206, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.02160224740869902, |
|
"grad_norm": 1.05584716796875, |
|
"learning_rate": 9.998004456068972e-06, |
|
"loss": 8.1094, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.02160224740869902, |
|
"eval_accuracy": 0.08401352364679797, |
|
"eval_loss": 8.0703125, |
|
"eval_runtime": 239.5111, |
|
"eval_samples_per_second": 140.983, |
|
"eval_steps_per_second": 2.204, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.02169911847331202, |
|
"grad_norm": 1.0520967245101929, |
|
"learning_rate": 9.997994768962512e-06, |
|
"loss": 8.0938, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.02169911847331202, |
|
"eval_accuracy": 0.08430202826621548, |
|
"eval_loss": 8.0625, |
|
"eval_runtime": 240.2811, |
|
"eval_samples_per_second": 140.531, |
|
"eval_steps_per_second": 2.197, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.021795989537925022, |
|
"grad_norm": 1.042554497718811, |
|
"learning_rate": 9.997985081856052e-06, |
|
"loss": 8.0938, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.021795989537925022, |
|
"eval_accuracy": 0.08457012391947913, |
|
"eval_loss": 8.0546875, |
|
"eval_runtime": 239.2715, |
|
"eval_samples_per_second": 141.124, |
|
"eval_steps_per_second": 2.207, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.021892860602538023, |
|
"grad_norm": 1.0677285194396973, |
|
"learning_rate": 9.99797539474959e-06, |
|
"loss": 8.1016, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.021892860602538023, |
|
"eval_accuracy": 0.08467908753451901, |
|
"eval_loss": 8.046875, |
|
"eval_runtime": 237.6539, |
|
"eval_samples_per_second": 142.085, |
|
"eval_steps_per_second": 2.222, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.02198973166715102, |
|
"grad_norm": 1.0176194906234741, |
|
"learning_rate": 9.997965707643128e-06, |
|
"loss": 8.1094, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.02198973166715102, |
|
"eval_accuracy": 0.08459319618334385, |
|
"eval_loss": 8.046875, |
|
"eval_runtime": 240.1907, |
|
"eval_samples_per_second": 140.584, |
|
"eval_steps_per_second": 2.198, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.02208660273176402, |
|
"grad_norm": 1.0204542875289917, |
|
"learning_rate": 9.997956020536667e-06, |
|
"loss": 8.1016, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.02208660273176402, |
|
"eval_accuracy": 0.08444465943818719, |
|
"eval_loss": 8.0390625, |
|
"eval_runtime": 239.7402, |
|
"eval_samples_per_second": 140.848, |
|
"eval_steps_per_second": 2.202, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.022183473796377022, |
|
"grad_norm": 1.0234394073486328, |
|
"learning_rate": 9.997946333430205e-06, |
|
"loss": 8.0859, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.022183473796377022, |
|
"eval_accuracy": 0.0843871379974103, |
|
"eval_loss": 8.03125, |
|
"eval_runtime": 238.5255, |
|
"eval_samples_per_second": 141.566, |
|
"eval_steps_per_second": 2.214, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.022280344860990023, |
|
"grad_norm": 1.0190110206604004, |
|
"learning_rate": 9.997936646323743e-06, |
|
"loss": 8.0859, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.022280344860990023, |
|
"eval_accuracy": 0.08446454732435414, |
|
"eval_loss": 8.03125, |
|
"eval_runtime": 239.3718, |
|
"eval_samples_per_second": 141.065, |
|
"eval_steps_per_second": 2.206, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.022377215925603024, |
|
"grad_norm": 1.0486043691635132, |
|
"learning_rate": 9.997926959217283e-06, |
|
"loss": 8.1094, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.022377215925603024, |
|
"eval_accuracy": 0.08493841167466973, |
|
"eval_loss": 8.0234375, |
|
"eval_runtime": 240.0196, |
|
"eval_samples_per_second": 140.684, |
|
"eval_steps_per_second": 2.2, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.02247408699021602, |
|
"grad_norm": 1.023290753364563, |
|
"learning_rate": 9.997917272110821e-06, |
|
"loss": 8.1016, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.02247408699021602, |
|
"eval_accuracy": 0.0852934987368587, |
|
"eval_loss": 8.015625, |
|
"eval_runtime": 238.7993, |
|
"eval_samples_per_second": 141.403, |
|
"eval_steps_per_second": 2.211, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.022570958054829022, |
|
"grad_norm": 1.0117347240447998, |
|
"learning_rate": 9.997907585004361e-06, |
|
"loss": 8.0859, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.022570958054829022, |
|
"eval_accuracy": 0.08559141174492868, |
|
"eval_loss": 8.0078125, |
|
"eval_runtime": 237.5708, |
|
"eval_samples_per_second": 142.134, |
|
"eval_steps_per_second": 2.222, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.022667829119442023, |
|
"grad_norm": 0.9845523238182068, |
|
"learning_rate": 9.997897897897899e-06, |
|
"loss": 8.0859, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.022667829119442023, |
|
"eval_accuracy": 0.08566416030087853, |
|
"eval_loss": 8.0078125, |
|
"eval_runtime": 238.2296, |
|
"eval_samples_per_second": 141.741, |
|
"eval_steps_per_second": 2.216, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.022764700184055024, |
|
"grad_norm": 0.9753006100654602, |
|
"learning_rate": 9.997888210791437e-06, |
|
"loss": 8.0781, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.022764700184055024, |
|
"eval_accuracy": 0.08569174859129644, |
|
"eval_loss": 8.0, |
|
"eval_runtime": 238.7612, |
|
"eval_samples_per_second": 141.426, |
|
"eval_steps_per_second": 2.211, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.022861571248668024, |
|
"grad_norm": 1.0460307598114014, |
|
"learning_rate": 9.997878523684977e-06, |
|
"loss": 8.0234, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.022861571248668024, |
|
"eval_accuracy": 0.08559063012494832, |
|
"eval_loss": 7.9921875, |
|
"eval_runtime": 241.525, |
|
"eval_samples_per_second": 139.807, |
|
"eval_steps_per_second": 2.186, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.02295844231328102, |
|
"grad_norm": 1.0023905038833618, |
|
"learning_rate": 9.997868836578515e-06, |
|
"loss": 8.0391, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.02295844231328102, |
|
"eval_accuracy": 0.08547078172795972, |
|
"eval_loss": 7.98828125, |
|
"eval_runtime": 240.1103, |
|
"eval_samples_per_second": 140.631, |
|
"eval_steps_per_second": 2.199, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.023055313377894023, |
|
"grad_norm": 1.004289984703064, |
|
"learning_rate": 9.997859149472053e-06, |
|
"loss": 8.0078, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.023055313377894023, |
|
"eval_accuracy": 0.08553345607082936, |
|
"eval_loss": 7.984375, |
|
"eval_runtime": 240.4054, |
|
"eval_samples_per_second": 140.459, |
|
"eval_steps_per_second": 2.196, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.023152184442507023, |
|
"grad_norm": 0.9898872971534729, |
|
"learning_rate": 9.997849462365592e-06, |
|
"loss": 8.0078, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.023152184442507023, |
|
"eval_accuracy": 0.08570740993979181, |
|
"eval_loss": 7.9765625, |
|
"eval_runtime": 240.3894, |
|
"eval_samples_per_second": 140.468, |
|
"eval_steps_per_second": 2.196, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.023249055507120024, |
|
"grad_norm": 1.0223126411437988, |
|
"learning_rate": 9.99783977525913e-06, |
|
"loss": 7.9883, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.023249055507120024, |
|
"eval_accuracy": 0.08616876837042163, |
|
"eval_loss": 7.97265625, |
|
"eval_runtime": 237.9077, |
|
"eval_samples_per_second": 141.933, |
|
"eval_steps_per_second": 2.219, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.023345926571733025, |
|
"grad_norm": 0.9876830577850342, |
|
"learning_rate": 9.997830088152668e-06, |
|
"loss": 7.9805, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.023345926571733025, |
|
"eval_accuracy": 0.08653523234565806, |
|
"eval_loss": 7.96484375, |
|
"eval_runtime": 236.1897, |
|
"eval_samples_per_second": 142.966, |
|
"eval_steps_per_second": 2.235, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.023442797636346022, |
|
"grad_norm": 0.9565869569778442, |
|
"learning_rate": 9.997820401046208e-06, |
|
"loss": 8.0234, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.023442797636346022, |
|
"eval_accuracy": 0.08681360485421905, |
|
"eval_loss": 7.9609375, |
|
"eval_runtime": 239.1608, |
|
"eval_samples_per_second": 141.19, |
|
"eval_steps_per_second": 2.208, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.023539668700959023, |
|
"grad_norm": 0.9919081926345825, |
|
"learning_rate": 9.997810713939748e-06, |
|
"loss": 7.9961, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.023539668700959023, |
|
"eval_accuracy": 0.0869957512585312, |
|
"eval_loss": 7.95703125, |
|
"eval_runtime": 238.8915, |
|
"eval_samples_per_second": 141.349, |
|
"eval_steps_per_second": 2.21, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.023636539765572024, |
|
"grad_norm": 0.9676252007484436, |
|
"learning_rate": 9.997801026833286e-06, |
|
"loss": 8.0156, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.023636539765572024, |
|
"eval_accuracy": 0.08696109943940189, |
|
"eval_loss": 7.94921875, |
|
"eval_runtime": 239.0437, |
|
"eval_samples_per_second": 141.259, |
|
"eval_steps_per_second": 2.209, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.023733410830185025, |
|
"grad_norm": 0.9586087465286255, |
|
"learning_rate": 9.997791339726824e-06, |
|
"loss": 7.9766, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.023733410830185025, |
|
"eval_accuracy": 0.08687515019045039, |
|
"eval_loss": 7.9453125, |
|
"eval_runtime": 241.5167, |
|
"eval_samples_per_second": 139.812, |
|
"eval_steps_per_second": 2.186, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.023830281894798022, |
|
"grad_norm": 0.9658289551734924, |
|
"learning_rate": 9.997781652620364e-06, |
|
"loss": 7.9297, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.023830281894798022, |
|
"eval_accuracy": 0.08664124317410547, |
|
"eval_loss": 7.94140625, |
|
"eval_runtime": 239.9096, |
|
"eval_samples_per_second": 140.749, |
|
"eval_steps_per_second": 2.201, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.023927152959411023, |
|
"grad_norm": 0.9587231874465942, |
|
"learning_rate": 9.997771965513902e-06, |
|
"loss": 7.9336, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.023927152959411023, |
|
"eval_accuracy": 0.08649270642894882, |
|
"eval_loss": 7.9375, |
|
"eval_runtime": 238.655, |
|
"eval_samples_per_second": 141.489, |
|
"eval_steps_per_second": 2.212, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.024024024024024024, |
|
"grad_norm": 0.9744223356246948, |
|
"learning_rate": 9.99776227840744e-06, |
|
"loss": 7.9219, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.024024024024024024, |
|
"eval_accuracy": 0.08659249324644151, |
|
"eval_loss": 7.9296875, |
|
"eval_runtime": 238.0999, |
|
"eval_samples_per_second": 141.819, |
|
"eval_steps_per_second": 2.218, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.024120895088637025, |
|
"grad_norm": 0.9676837921142578, |
|
"learning_rate": 9.99775259130098e-06, |
|
"loss": 7.957, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.024120895088637025, |
|
"eval_accuracy": 0.08693261373345097, |
|
"eval_loss": 7.92578125, |
|
"eval_runtime": 239.9677, |
|
"eval_samples_per_second": 140.715, |
|
"eval_steps_per_second": 2.2, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.024217766153250025, |
|
"grad_norm": 0.9536520838737488, |
|
"learning_rate": 9.99774290419452e-06, |
|
"loss": 7.9453, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.024217766153250025, |
|
"eval_accuracy": 0.08742873977876275, |
|
"eval_loss": 7.91796875, |
|
"eval_runtime": 240.6934, |
|
"eval_samples_per_second": 140.291, |
|
"eval_steps_per_second": 2.194, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.024314637217863023, |
|
"grad_norm": 0.9051578044891357, |
|
"learning_rate": 9.997733217088057e-06, |
|
"loss": 7.9805, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.024314637217863023, |
|
"eval_accuracy": 0.08791403893990214, |
|
"eval_loss": 7.9140625, |
|
"eval_runtime": 241.0758, |
|
"eval_samples_per_second": 140.068, |
|
"eval_steps_per_second": 2.19, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.024411508282476024, |
|
"grad_norm": 0.9311773180961609, |
|
"learning_rate": 9.997723529981595e-06, |
|
"loss": 7.9531, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.024411508282476024, |
|
"eval_accuracy": 0.08829436943256792, |
|
"eval_loss": 7.91015625, |
|
"eval_runtime": 239.1867, |
|
"eval_samples_per_second": 141.174, |
|
"eval_steps_per_second": 2.207, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.024508379347089024, |
|
"grad_norm": 1.0077519416809082, |
|
"learning_rate": 9.997713842875133e-06, |
|
"loss": 7.9102, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.024508379347089024, |
|
"eval_accuracy": 0.08852494732677427, |
|
"eval_loss": 7.90625, |
|
"eval_runtime": 240.0697, |
|
"eval_samples_per_second": 140.655, |
|
"eval_steps_per_second": 2.199, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.024605250411702025, |
|
"grad_norm": 0.8876093029975891, |
|
"learning_rate": 9.997704155768673e-06, |
|
"loss": 7.9844, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.024605250411702025, |
|
"eval_accuracy": 0.08859361408949334, |
|
"eval_loss": 7.8984375, |
|
"eval_runtime": 239.2375, |
|
"eval_samples_per_second": 141.144, |
|
"eval_steps_per_second": 2.207, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.024702121476315026, |
|
"grad_norm": 0.9206761717796326, |
|
"learning_rate": 9.997694468662211e-06, |
|
"loss": 7.9414, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.024702121476315026, |
|
"eval_accuracy": 0.08849816960522489, |
|
"eval_loss": 7.89453125, |
|
"eval_runtime": 240.2162, |
|
"eval_samples_per_second": 140.569, |
|
"eval_steps_per_second": 2.198, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.024798992540928023, |
|
"grad_norm": 0.8806933164596558, |
|
"learning_rate": 9.997684781555749e-06, |
|
"loss": 7.9453, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.024798992540928023, |
|
"eval_accuracy": 0.088308004358892, |
|
"eval_loss": 7.890625, |
|
"eval_runtime": 241.0125, |
|
"eval_samples_per_second": 140.105, |
|
"eval_steps_per_second": 2.191, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.024895863605541024, |
|
"grad_norm": 0.9106225371360779, |
|
"learning_rate": 9.997675094449289e-06, |
|
"loss": 7.9219, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.024895863605541024, |
|
"eval_accuracy": 0.08826322042890615, |
|
"eval_loss": 7.88671875, |
|
"eval_runtime": 242.9806, |
|
"eval_samples_per_second": 138.97, |
|
"eval_steps_per_second": 2.173, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.024992734670154025, |
|
"grad_norm": 0.9404253959655762, |
|
"learning_rate": 9.997665407342827e-06, |
|
"loss": 7.9141, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.024992734670154025, |
|
"eval_accuracy": 0.08845868332177259, |
|
"eval_loss": 7.8828125, |
|
"eval_runtime": 239.9439, |
|
"eval_samples_per_second": 140.729, |
|
"eval_steps_per_second": 2.201, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.025089605734767026, |
|
"grad_norm": 0.943131685256958, |
|
"learning_rate": 9.997655720236367e-06, |
|
"loss": 7.9258, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.025089605734767026, |
|
"eval_accuracy": 0.08890710159939423, |
|
"eval_loss": 7.875, |
|
"eval_runtime": 240.2599, |
|
"eval_samples_per_second": 140.544, |
|
"eval_steps_per_second": 2.198, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.025186476799380027, |
|
"grad_norm": 0.8273116946220398, |
|
"learning_rate": 9.997646033129905e-06, |
|
"loss": 7.957, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.025186476799380027, |
|
"eval_accuracy": 0.08928497143656629, |
|
"eval_loss": 7.87109375, |
|
"eval_runtime": 239.6344, |
|
"eval_samples_per_second": 140.91, |
|
"eval_steps_per_second": 2.203, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.025283347863993024, |
|
"grad_norm": 0.8834772109985352, |
|
"learning_rate": 9.997636346023444e-06, |
|
"loss": 7.8984, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.025283347863993024, |
|
"eval_accuracy": 0.08963823471880107, |
|
"eval_loss": 7.8671875, |
|
"eval_runtime": 240.9633, |
|
"eval_samples_per_second": 140.133, |
|
"eval_steps_per_second": 2.191, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.025380218928606025, |
|
"grad_norm": 0.8725751638412476, |
|
"learning_rate": 9.997626658916982e-06, |
|
"loss": 7.8945, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.025380218928606025, |
|
"eval_accuracy": 0.08984756412909688, |
|
"eval_loss": 7.86328125, |
|
"eval_runtime": 239.3388, |
|
"eval_samples_per_second": 141.085, |
|
"eval_steps_per_second": 2.206, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.025477089993219026, |
|
"grad_norm": 0.8974217176437378, |
|
"learning_rate": 9.99761697181052e-06, |
|
"loss": 7.9141, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.025477089993219026, |
|
"eval_accuracy": 0.08987159170627092, |
|
"eval_loss": 7.859375, |
|
"eval_runtime": 239.8452, |
|
"eval_samples_per_second": 140.787, |
|
"eval_steps_per_second": 2.201, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.025573961057832027, |
|
"grad_norm": 0.8629518747329712, |
|
"learning_rate": 9.99760728470406e-06, |
|
"loss": 7.9453, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.025573961057832027, |
|
"eval_accuracy": 0.0898933323212802, |
|
"eval_loss": 7.85546875, |
|
"eval_runtime": 242.7767, |
|
"eval_samples_per_second": 139.087, |
|
"eval_steps_per_second": 2.175, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.025670832122445024, |
|
"grad_norm": 0.8912859559059143, |
|
"learning_rate": 9.997597597597598e-06, |
|
"loss": 7.8672, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.025670832122445024, |
|
"eval_accuracy": 0.08997965790577779, |
|
"eval_loss": 7.84765625, |
|
"eval_runtime": 242.1493, |
|
"eval_samples_per_second": 139.447, |
|
"eval_steps_per_second": 2.18, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.025767703187058025, |
|
"grad_norm": 0.8678880333900452, |
|
"learning_rate": 9.997587910491136e-06, |
|
"loss": 7.9375, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.025767703187058025, |
|
"eval_accuracy": 0.09024167429252752, |
|
"eval_loss": 7.84375, |
|
"eval_runtime": 241.9128, |
|
"eval_samples_per_second": 139.583, |
|
"eval_steps_per_second": 2.183, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.025864574251671026, |
|
"grad_norm": 0.8691763281822205, |
|
"learning_rate": 9.997578223384676e-06, |
|
"loss": 7.9219, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.025864574251671026, |
|
"eval_accuracy": 0.09048151583094556, |
|
"eval_loss": 7.83984375, |
|
"eval_runtime": 240.5035, |
|
"eval_samples_per_second": 140.401, |
|
"eval_steps_per_second": 2.195, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.025961445316284026, |
|
"grad_norm": 0.8926984071731567, |
|
"learning_rate": 9.997568536278216e-06, |
|
"loss": 7.8555, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.025961445316284026, |
|
"eval_accuracy": 0.09069718504774874, |
|
"eval_loss": 7.8359375, |
|
"eval_runtime": 239.1407, |
|
"eval_samples_per_second": 141.201, |
|
"eval_steps_per_second": 2.208, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.026058316380897027, |
|
"grad_norm": 0.8526946306228638, |
|
"learning_rate": 9.997558849171754e-06, |
|
"loss": 7.8984, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.026058316380897027, |
|
"eval_accuracy": 0.0907848133322136, |
|
"eval_loss": 7.83203125, |
|
"eval_runtime": 238.5966, |
|
"eval_samples_per_second": 141.523, |
|
"eval_steps_per_second": 2.213, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.026155187445510025, |
|
"grad_norm": 0.8387218117713928, |
|
"learning_rate": 9.997549162065292e-06, |
|
"loss": 7.8906, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.026155187445510025, |
|
"eval_accuracy": 0.09087632076769209, |
|
"eval_loss": 7.828125, |
|
"eval_runtime": 240.2401, |
|
"eval_samples_per_second": 140.555, |
|
"eval_steps_per_second": 2.198, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.026252058510123025, |
|
"grad_norm": 0.8393184542655945, |
|
"learning_rate": 9.997539474958831e-06, |
|
"loss": 7.8711, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.026252058510123025, |
|
"eval_accuracy": 0.09095222475245154, |
|
"eval_loss": 7.82421875, |
|
"eval_runtime": 239.8682, |
|
"eval_samples_per_second": 140.773, |
|
"eval_steps_per_second": 2.201, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.026348929574736026, |
|
"grad_norm": 0.8263446092605591, |
|
"learning_rate": 9.99752978785237e-06, |
|
"loss": 7.8633, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.026348929574736026, |
|
"eval_accuracy": 0.09090057993597143, |
|
"eval_loss": 7.8203125, |
|
"eval_runtime": 240.3911, |
|
"eval_samples_per_second": 140.467, |
|
"eval_steps_per_second": 2.196, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.026445800639349027, |
|
"grad_norm": 0.8394054770469666, |
|
"learning_rate": 9.997520100745907e-06, |
|
"loss": 7.8633, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.026445800639349027, |
|
"eval_accuracy": 0.09090938039797253, |
|
"eval_loss": 7.81640625, |
|
"eval_runtime": 238.99, |
|
"eval_samples_per_second": 141.29, |
|
"eval_steps_per_second": 2.209, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.026542671703962028, |
|
"grad_norm": 0.8645299077033997, |
|
"learning_rate": 9.997510413639445e-06, |
|
"loss": 7.8789, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.026542671703962028, |
|
"eval_accuracy": 0.09088737924296979, |
|
"eval_loss": 7.8125, |
|
"eval_runtime": 239.144, |
|
"eval_samples_per_second": 141.199, |
|
"eval_steps_per_second": 2.208, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.026639542768575025, |
|
"grad_norm": 0.832476019859314, |
|
"learning_rate": 9.997500726532985e-06, |
|
"loss": 7.8438, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.026639542768575025, |
|
"eval_accuracy": 0.09096122785666977, |
|
"eval_loss": 7.80859375, |
|
"eval_runtime": 238.0741, |
|
"eval_samples_per_second": 141.834, |
|
"eval_steps_per_second": 2.218, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.026736413833188026, |
|
"grad_norm": 0.8168690800666809, |
|
"learning_rate": 9.997491039426525e-06, |
|
"loss": 7.8789, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.026736413833188026, |
|
"eval_accuracy": 0.09111972301935398, |
|
"eval_loss": 7.8046875, |
|
"eval_runtime": 237.8823, |
|
"eval_samples_per_second": 141.948, |
|
"eval_steps_per_second": 2.22, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.026833284897801027, |
|
"grad_norm": 0.8217095732688904, |
|
"learning_rate": 9.997481352320063e-06, |
|
"loss": 7.8516, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.026833284897801027, |
|
"eval_accuracy": 0.09120419587500923, |
|
"eval_loss": 7.80078125, |
|
"eval_runtime": 238.7067, |
|
"eval_samples_per_second": 141.458, |
|
"eval_steps_per_second": 2.212, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.026930155962414028, |
|
"grad_norm": 0.8041301965713501, |
|
"learning_rate": 9.997471665213601e-06, |
|
"loss": 7.8711, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.026930155962414028, |
|
"eval_accuracy": 0.09132303106091219, |
|
"eval_loss": 7.796875, |
|
"eval_runtime": 239.1861, |
|
"eval_samples_per_second": 141.175, |
|
"eval_steps_per_second": 2.207, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.02702702702702703, |
|
"grad_norm": 0.8525278568267822, |
|
"learning_rate": 9.99746197810714e-06, |
|
"loss": 7.8008, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.02702702702702703, |
|
"eval_accuracy": 0.0915536379040067, |
|
"eval_loss": 7.79296875, |
|
"eval_runtime": 239.3931, |
|
"eval_samples_per_second": 141.053, |
|
"eval_steps_per_second": 2.206, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.027123898091640026, |
|
"grad_norm": 0.8100795745849609, |
|
"learning_rate": 9.997452291000679e-06, |
|
"loss": 7.8477, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.027123898091640026, |
|
"eval_accuracy": 0.09175946449883497, |
|
"eval_loss": 7.7890625, |
|
"eval_runtime": 239.2146, |
|
"eval_samples_per_second": 141.158, |
|
"eval_steps_per_second": 2.207, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.027220769156253027, |
|
"grad_norm": 0.8111329674720764, |
|
"learning_rate": 9.997442603894217e-06, |
|
"loss": 7.8086, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.027220769156253027, |
|
"eval_accuracy": 0.09185091403653714, |
|
"eval_loss": 7.78515625, |
|
"eval_runtime": 238.4234, |
|
"eval_samples_per_second": 141.626, |
|
"eval_steps_per_second": 2.215, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.027317640220866028, |
|
"grad_norm": 0.7890406847000122, |
|
"learning_rate": 9.997432916787756e-06, |
|
"loss": 7.8398, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.027317640220866028, |
|
"eval_accuracy": 0.09197820229778327, |
|
"eval_loss": 7.78125, |
|
"eval_runtime": 239.0276, |
|
"eval_samples_per_second": 141.268, |
|
"eval_steps_per_second": 2.209, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.02741451128547903, |
|
"grad_norm": 0.8093599677085876, |
|
"learning_rate": 9.997423229681295e-06, |
|
"loss": 7.8008, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.02741451128547903, |
|
"eval_accuracy": 0.09215345886671297, |
|
"eval_loss": 7.77734375, |
|
"eval_runtime": 236.4329, |
|
"eval_samples_per_second": 142.819, |
|
"eval_steps_per_second": 2.233, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.02751138235009203, |
|
"grad_norm": 0.7843554615974426, |
|
"learning_rate": 9.997413542574834e-06, |
|
"loss": 7.8281, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.02751138235009203, |
|
"eval_accuracy": 0.09223813436458536, |
|
"eval_loss": 7.7734375, |
|
"eval_runtime": 240.4286, |
|
"eval_samples_per_second": 140.445, |
|
"eval_steps_per_second": 2.196, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.027608253414705027, |
|
"grad_norm": 0.8685352206230164, |
|
"learning_rate": 9.997403855468372e-06, |
|
"loss": 7.7852, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.027608253414705027, |
|
"eval_accuracy": 0.09260729058642081, |
|
"eval_loss": 7.76953125, |
|
"eval_runtime": 240.1204, |
|
"eval_samples_per_second": 140.625, |
|
"eval_steps_per_second": 2.199, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.027705124479318027, |
|
"grad_norm": 0.810741662979126, |
|
"learning_rate": 9.997394168361912e-06, |
|
"loss": 7.793, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.027705124479318027, |
|
"eval_accuracy": 0.09294211342689672, |
|
"eval_loss": 7.765625, |
|
"eval_runtime": 240.2366, |
|
"eval_samples_per_second": 140.557, |
|
"eval_steps_per_second": 2.198, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.027801995543931028, |
|
"grad_norm": 0.7687424421310425, |
|
"learning_rate": 9.99738448125545e-06, |
|
"loss": 7.8086, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.027801995543931028, |
|
"eval_accuracy": 0.09305313241299606, |
|
"eval_loss": 7.76171875, |
|
"eval_runtime": 239.5713, |
|
"eval_samples_per_second": 140.948, |
|
"eval_steps_per_second": 2.204, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.02789886660854403, |
|
"grad_norm": 0.7878952026367188, |
|
"learning_rate": 9.997374794148988e-06, |
|
"loss": 7.7812, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.02789886660854403, |
|
"eval_accuracy": 0.09312049647574788, |
|
"eval_loss": 7.7578125, |
|
"eval_runtime": 240.4397, |
|
"eval_samples_per_second": 140.439, |
|
"eval_steps_per_second": 2.196, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.027995737673157026, |
|
"grad_norm": 0.7822087407112122, |
|
"learning_rate": 9.997365107042528e-06, |
|
"loss": 7.793, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.027995737673157026, |
|
"eval_accuracy": 0.09311163811597047, |
|
"eval_loss": 7.75390625, |
|
"eval_runtime": 239.2769, |
|
"eval_samples_per_second": 141.121, |
|
"eval_steps_per_second": 2.207, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.028092608737770027, |
|
"grad_norm": 0.8028809428215027, |
|
"learning_rate": 9.997355419936066e-06, |
|
"loss": 7.7539, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.028092608737770027, |
|
"eval_accuracy": 0.09305567991515429, |
|
"eval_loss": 7.75, |
|
"eval_runtime": 236.6986, |
|
"eval_samples_per_second": 142.658, |
|
"eval_steps_per_second": 2.231, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.028189479802383028, |
|
"grad_norm": 0.8122669458389282, |
|
"learning_rate": 9.997345732829604e-06, |
|
"loss": 7.75, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.028189479802383028, |
|
"eval_accuracy": 0.09302276502931466, |
|
"eval_loss": 7.74609375, |
|
"eval_runtime": 239.075, |
|
"eval_samples_per_second": 141.24, |
|
"eval_steps_per_second": 2.209, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.02828635086699603, |
|
"grad_norm": 0.7843779921531677, |
|
"learning_rate": 9.997336045723144e-06, |
|
"loss": 7.8164, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.02828635086699603, |
|
"eval_accuracy": 0.09302994435357871, |
|
"eval_loss": 7.7421875, |
|
"eval_runtime": 239.8333, |
|
"eval_samples_per_second": 140.794, |
|
"eval_steps_per_second": 2.202, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.02838322193160903, |
|
"grad_norm": 0.8030862808227539, |
|
"learning_rate": 9.997326358616683e-06, |
|
"loss": 7.7539, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.02838322193160903, |
|
"eval_accuracy": 0.09310758527162785, |
|
"eval_loss": 7.7421875, |
|
"eval_runtime": 238.9644, |
|
"eval_samples_per_second": 141.306, |
|
"eval_steps_per_second": 2.21, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.028480092996222027, |
|
"grad_norm": 0.7855885624885559, |
|
"learning_rate": 9.997316671510221e-06, |
|
"loss": 7.8086, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.028480092996222027, |
|
"eval_accuracy": 0.09324769789032951, |
|
"eval_loss": 7.73828125, |
|
"eval_runtime": 238.9575, |
|
"eval_samples_per_second": 141.31, |
|
"eval_steps_per_second": 2.21, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.028576964060835028, |
|
"grad_norm": 0.7736480832099915, |
|
"learning_rate": 9.99730698440376e-06, |
|
"loss": 7.793, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.028576964060835028, |
|
"eval_accuracy": 0.09355316655820965, |
|
"eval_loss": 7.734375, |
|
"eval_runtime": 239.2168, |
|
"eval_samples_per_second": 141.156, |
|
"eval_steps_per_second": 2.207, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.02867383512544803, |
|
"grad_norm": 0.7640628814697266, |
|
"learning_rate": 9.997297297297297e-06, |
|
"loss": 7.7695, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.02867383512544803, |
|
"eval_accuracy": 0.0937199121540199, |
|
"eval_loss": 7.73046875, |
|
"eval_runtime": 238.9578, |
|
"eval_samples_per_second": 141.309, |
|
"eval_steps_per_second": 2.21, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.02877070619006103, |
|
"grad_norm": 0.787857174873352, |
|
"learning_rate": 9.997287610190837e-06, |
|
"loss": 7.75, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.02877070619006103, |
|
"eval_accuracy": 0.09381628300270953, |
|
"eval_loss": 7.7265625, |
|
"eval_runtime": 235.9881, |
|
"eval_samples_per_second": 143.088, |
|
"eval_steps_per_second": 2.237, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.02886757725467403, |
|
"grad_norm": 0.7313410639762878, |
|
"learning_rate": 9.997277923084375e-06, |
|
"loss": 7.7891, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.02886757725467403, |
|
"eval_accuracy": 0.09376585403953219, |
|
"eval_loss": 7.72265625, |
|
"eval_runtime": 238.897, |
|
"eval_samples_per_second": 141.345, |
|
"eval_steps_per_second": 2.21, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.028964448319287028, |
|
"grad_norm": 0.7264304757118225, |
|
"learning_rate": 9.997268235977913e-06, |
|
"loss": 7.7773, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.028964448319287028, |
|
"eval_accuracy": 0.09357499401988342, |
|
"eval_loss": 7.71875, |
|
"eval_runtime": 239.7653, |
|
"eval_samples_per_second": 140.834, |
|
"eval_steps_per_second": 2.202, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.02906131938390003, |
|
"grad_norm": 0.8194934725761414, |
|
"learning_rate": 9.997258548871453e-06, |
|
"loss": 7.7227, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02906131938390003, |
|
"eval_accuracy": 0.09352896528770664, |
|
"eval_loss": 7.71484375, |
|
"eval_runtime": 241.2504, |
|
"eval_samples_per_second": 139.967, |
|
"eval_steps_per_second": 2.189, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02915819044851303, |
|
"grad_norm": 0.7791144251823425, |
|
"learning_rate": 9.997248861764993e-06, |
|
"loss": 7.7109, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.02915819044851303, |
|
"eval_accuracy": 0.09365309812014315, |
|
"eval_loss": 7.71484375, |
|
"eval_runtime": 240.0076, |
|
"eval_samples_per_second": 140.691, |
|
"eval_steps_per_second": 2.2, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.02925506151312603, |
|
"grad_norm": 0.8002768754959106, |
|
"learning_rate": 9.99723917465853e-06, |
|
"loss": 7.7148, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.02925506151312603, |
|
"eval_accuracy": 0.09385024004852296, |
|
"eval_loss": 7.7109375, |
|
"eval_runtime": 238.1093, |
|
"eval_samples_per_second": 141.813, |
|
"eval_steps_per_second": 2.217, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.02935193257773903, |
|
"grad_norm": 0.7393519282341003, |
|
"learning_rate": 9.997229487552069e-06, |
|
"loss": 7.7812, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.02935193257773903, |
|
"eval_accuracy": 0.09395888522579307, |
|
"eval_loss": 7.70703125, |
|
"eval_runtime": 237.8539, |
|
"eval_samples_per_second": 141.965, |
|
"eval_steps_per_second": 2.22, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.02944880364235203, |
|
"grad_norm": 0.7291700839996338, |
|
"learning_rate": 9.997219800445608e-06, |
|
"loss": 7.7109, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.02944880364235203, |
|
"eval_accuracy": 0.0940500742235018, |
|
"eval_loss": 7.703125, |
|
"eval_runtime": 237.0915, |
|
"eval_samples_per_second": 142.422, |
|
"eval_steps_per_second": 2.227, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.02954567470696503, |
|
"grad_norm": 0.7372825145721436, |
|
"learning_rate": 9.997210113339146e-06, |
|
"loss": 7.7539, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.02954567470696503, |
|
"eval_accuracy": 0.09420005841306653, |
|
"eval_loss": 7.69921875, |
|
"eval_runtime": 238.3146, |
|
"eval_samples_per_second": 141.691, |
|
"eval_steps_per_second": 2.216, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.02964254577157803, |
|
"grad_norm": 0.7321212887763977, |
|
"learning_rate": 9.997200426232684e-06, |
|
"loss": 7.7734, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.02964254577157803, |
|
"eval_accuracy": 0.09429781880838792, |
|
"eval_loss": 7.69921875, |
|
"eval_runtime": 241.1284, |
|
"eval_samples_per_second": 140.037, |
|
"eval_steps_per_second": 2.19, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.02973941683619103, |
|
"grad_norm": 0.733534574508667, |
|
"learning_rate": 9.997190739126224e-06, |
|
"loss": 7.6914, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.02973941683619103, |
|
"eval_accuracy": 0.09434127108951833, |
|
"eval_loss": 7.6953125, |
|
"eval_runtime": 238.2743, |
|
"eval_samples_per_second": 141.715, |
|
"eval_steps_per_second": 2.216, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.029836287900804028, |
|
"grad_norm": 0.7814671993255615, |
|
"learning_rate": 9.997181052019762e-06, |
|
"loss": 7.6445, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.029836287900804028, |
|
"eval_accuracy": 0.09439731613699899, |
|
"eval_loss": 7.69140625, |
|
"eval_runtime": 236.2657, |
|
"eval_samples_per_second": 142.92, |
|
"eval_steps_per_second": 2.235, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.02993315896541703, |
|
"grad_norm": 0.7276116609573364, |
|
"learning_rate": 9.997171364913302e-06, |
|
"loss": 7.6953, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.02993315896541703, |
|
"eval_accuracy": 0.09446468019975081, |
|
"eval_loss": 7.6875, |
|
"eval_runtime": 235.6433, |
|
"eval_samples_per_second": 143.297, |
|
"eval_steps_per_second": 2.241, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.03003003003003003, |
|
"grad_norm": 0.7116464376449585, |
|
"learning_rate": 9.99716167780684e-06, |
|
"loss": 7.75, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03003003003003003, |
|
"eval_accuracy": 0.09461590919150648, |
|
"eval_loss": 7.68359375, |
|
"eval_runtime": 237.3533, |
|
"eval_samples_per_second": 142.265, |
|
"eval_steps_per_second": 2.225, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03012690109464303, |
|
"grad_norm": 0.7537593245506287, |
|
"learning_rate": 9.99715199070038e-06, |
|
"loss": 7.7539, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.03012690109464303, |
|
"eval_accuracy": 0.09491029043522077, |
|
"eval_loss": 7.68359375, |
|
"eval_runtime": 236.2372, |
|
"eval_samples_per_second": 142.937, |
|
"eval_steps_per_second": 2.235, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.03022377215925603, |
|
"grad_norm": 0.7783864140510559, |
|
"learning_rate": 9.997142303593918e-06, |
|
"loss": 7.6953, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.03022377215925603, |
|
"eval_accuracy": 0.0951206620054904, |
|
"eval_loss": 7.6796875, |
|
"eval_runtime": 235.999, |
|
"eval_samples_per_second": 143.081, |
|
"eval_steps_per_second": 2.237, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.03032064322386903, |
|
"grad_norm": 0.7092785835266113, |
|
"learning_rate": 9.997132616487456e-06, |
|
"loss": 7.7188, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.03032064322386903, |
|
"eval_accuracy": 0.09514008670944675, |
|
"eval_loss": 7.67578125, |
|
"eval_runtime": 236.3642, |
|
"eval_samples_per_second": 142.86, |
|
"eval_steps_per_second": 2.234, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.03041751428848203, |
|
"grad_norm": 0.7478684782981873, |
|
"learning_rate": 9.997122929380995e-06, |
|
"loss": 7.6914, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.03041751428848203, |
|
"eval_accuracy": 0.09526714337958758, |
|
"eval_loss": 7.671875, |
|
"eval_runtime": 235.6142, |
|
"eval_samples_per_second": 143.315, |
|
"eval_steps_per_second": 2.241, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.03051438535309503, |
|
"grad_norm": 0.7141132354736328, |
|
"learning_rate": 9.997113242274533e-06, |
|
"loss": 7.7344, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.03051438535309503, |
|
"eval_accuracy": 0.09541362475368477, |
|
"eval_loss": 7.671875, |
|
"eval_runtime": 237.3782, |
|
"eval_samples_per_second": 142.25, |
|
"eval_steps_per_second": 2.224, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.03061125641770803, |
|
"grad_norm": 0.677810549736023, |
|
"learning_rate": 9.997103555168072e-06, |
|
"loss": 7.7383, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.03061125641770803, |
|
"eval_accuracy": 0.0953166749272319, |
|
"eval_loss": 7.66796875, |
|
"eval_runtime": 237.8956, |
|
"eval_samples_per_second": 141.94, |
|
"eval_steps_per_second": 2.219, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.030708127482321032, |
|
"grad_norm": 0.6948665976524353, |
|
"learning_rate": 9.99709386806161e-06, |
|
"loss": 7.6875, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.030708127482321032, |
|
"eval_accuracy": 0.09496401957164852, |
|
"eval_loss": 7.6640625, |
|
"eval_runtime": 236.596, |
|
"eval_samples_per_second": 142.72, |
|
"eval_steps_per_second": 2.232, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.03080499854693403, |
|
"grad_norm": 0.7147016525268555, |
|
"learning_rate": 9.99708418095515e-06, |
|
"loss": 7.6914, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.03080499854693403, |
|
"eval_accuracy": 0.09470200318489878, |
|
"eval_loss": 7.66015625, |
|
"eval_runtime": 235.4121, |
|
"eval_samples_per_second": 143.438, |
|
"eval_steps_per_second": 2.243, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.03090186961154703, |
|
"grad_norm": 0.7114992141723633, |
|
"learning_rate": 9.997074493848689e-06, |
|
"loss": 7.6758, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.03090186961154703, |
|
"eval_accuracy": 0.09453548918019383, |
|
"eval_loss": 7.66015625, |
|
"eval_runtime": 235.7073, |
|
"eval_samples_per_second": 143.258, |
|
"eval_steps_per_second": 2.24, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.03099874067616003, |
|
"grad_norm": 0.7744684219360352, |
|
"learning_rate": 9.997064806742227e-06, |
|
"loss": 7.6836, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03099874067616003, |
|
"eval_accuracy": 0.0947446159482725, |
|
"eval_loss": 7.65625, |
|
"eval_runtime": 236.6536, |
|
"eval_samples_per_second": 142.685, |
|
"eval_steps_per_second": 2.231, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.031095611740773032, |
|
"grad_norm": 0.6939721703529358, |
|
"learning_rate": 9.997055119635765e-06, |
|
"loss": 7.6914, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.031095611740773032, |
|
"eval_accuracy": 0.09504519225405336, |
|
"eval_loss": 7.65234375, |
|
"eval_runtime": 236.3373, |
|
"eval_samples_per_second": 142.876, |
|
"eval_steps_per_second": 2.234, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.031192482805386033, |
|
"grad_norm": 0.7224457859992981, |
|
"learning_rate": 9.997045432529305e-06, |
|
"loss": 7.6719, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.031192482805386033, |
|
"eval_accuracy": 0.0953886418632014, |
|
"eval_loss": 7.65234375, |
|
"eval_runtime": 236.1013, |
|
"eval_samples_per_second": 143.019, |
|
"eval_steps_per_second": 2.236, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.03128935386999903, |
|
"grad_norm": 0.6798779368400574, |
|
"learning_rate": 9.997035745422843e-06, |
|
"loss": 7.6914, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.03128935386999903, |
|
"eval_accuracy": 0.09575362944514158, |
|
"eval_loss": 7.6484375, |
|
"eval_runtime": 236.3544, |
|
"eval_samples_per_second": 142.866, |
|
"eval_steps_per_second": 2.234, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.03138622493461203, |
|
"grad_norm": 0.7235939502716064, |
|
"learning_rate": 9.99702605831638e-06, |
|
"loss": 7.6094, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.03138622493461203, |
|
"eval_accuracy": 0.09607695957701737, |
|
"eval_loss": 7.64453125, |
|
"eval_runtime": 236.9857, |
|
"eval_samples_per_second": 142.485, |
|
"eval_steps_per_second": 2.228, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.03148309599922503, |
|
"grad_norm": 0.6780532002449036, |
|
"learning_rate": 9.99701637120992e-06, |
|
"loss": 7.7148, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.03148309599922503, |
|
"eval_accuracy": 0.09617480681900324, |
|
"eval_loss": 7.640625, |
|
"eval_runtime": 236.9965, |
|
"eval_samples_per_second": 142.479, |
|
"eval_steps_per_second": 2.228, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.03157996706383803, |
|
"grad_norm": 0.7239165306091309, |
|
"learning_rate": 9.99700668410346e-06, |
|
"loss": 7.6641, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.03157996706383803, |
|
"eval_accuracy": 0.09612828595572771, |
|
"eval_loss": 7.640625, |
|
"eval_runtime": 236.7231, |
|
"eval_samples_per_second": 142.643, |
|
"eval_steps_per_second": 2.23, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.03167683812845103, |
|
"grad_norm": 0.6952512860298157, |
|
"learning_rate": 9.996996996996998e-06, |
|
"loss": 7.6602, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.03167683812845103, |
|
"eval_accuracy": 0.09610260829192846, |
|
"eval_loss": 7.63671875, |
|
"eval_runtime": 237.3447, |
|
"eval_samples_per_second": 142.27, |
|
"eval_steps_per_second": 2.225, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.031773709193064034, |
|
"grad_norm": 0.6919338703155518, |
|
"learning_rate": 9.996987309890536e-06, |
|
"loss": 7.7031, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.031773709193064034, |
|
"eval_accuracy": 0.0962992001914332, |
|
"eval_loss": 7.6328125, |
|
"eval_runtime": 236.9539, |
|
"eval_samples_per_second": 142.505, |
|
"eval_steps_per_second": 2.228, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.031870580257677035, |
|
"grad_norm": 0.6624383926391602, |
|
"learning_rate": 9.996977622784076e-06, |
|
"loss": 7.6953, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.031870580257677035, |
|
"eval_accuracy": 0.09658234926654084, |
|
"eval_loss": 7.6328125, |
|
"eval_runtime": 237.5409, |
|
"eval_samples_per_second": 142.152, |
|
"eval_steps_per_second": 2.223, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.03196745132229003, |
|
"grad_norm": 0.6538208723068237, |
|
"learning_rate": 9.996967935677614e-06, |
|
"loss": 7.6445, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03196745132229003, |
|
"eval_accuracy": 0.09676490095528725, |
|
"eval_loss": 7.62890625, |
|
"eval_runtime": 236.9649, |
|
"eval_samples_per_second": 142.498, |
|
"eval_steps_per_second": 2.228, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03206432238690303, |
|
"grad_norm": 0.6456217765808105, |
|
"learning_rate": 9.996958248571152e-06, |
|
"loss": 7.6445, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.03206432238690303, |
|
"eval_accuracy": 0.09686503515943788, |
|
"eval_loss": 7.625, |
|
"eval_runtime": 236.8552, |
|
"eval_samples_per_second": 142.564, |
|
"eval_steps_per_second": 2.229, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.03216119345151603, |
|
"grad_norm": 0.6584651470184326, |
|
"learning_rate": 9.996948561464692e-06, |
|
"loss": 7.6445, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.03216119345151603, |
|
"eval_accuracy": 0.09685333980862064, |
|
"eval_loss": 7.625, |
|
"eval_runtime": 235.6971, |
|
"eval_samples_per_second": 143.264, |
|
"eval_steps_per_second": 2.24, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 0.6445140242576599, |
|
"learning_rate": 9.99693887435823e-06, |
|
"loss": 7.668, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.03225806451612903, |
|
"eval_accuracy": 0.09675618733995064, |
|
"eval_loss": 7.62109375, |
|
"eval_runtime": 236.584, |
|
"eval_samples_per_second": 142.727, |
|
"eval_steps_per_second": 2.232, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.03235493558074203, |
|
"grad_norm": 0.6661810874938965, |
|
"learning_rate": 9.996929187251768e-06, |
|
"loss": 7.6523, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.03235493558074203, |
|
"eval_accuracy": 0.09667113550653215, |
|
"eval_loss": 7.6171875, |
|
"eval_runtime": 237.5698, |
|
"eval_samples_per_second": 142.135, |
|
"eval_steps_per_second": 2.223, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.03245180664535503, |
|
"grad_norm": 0.6913946270942688, |
|
"learning_rate": 9.996919500145308e-06, |
|
"loss": 7.6602, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.03245180664535503, |
|
"eval_accuracy": 0.0968231461182682, |
|
"eval_loss": 7.6171875, |
|
"eval_runtime": 236.2729, |
|
"eval_samples_per_second": 142.915, |
|
"eval_steps_per_second": 2.235, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.032548677709968034, |
|
"grad_norm": 0.6751811504364014, |
|
"learning_rate": 9.996909813038846e-06, |
|
"loss": 7.6328, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.032548677709968034, |
|
"eval_accuracy": 0.09719745524219639, |
|
"eval_loss": 7.61328125, |
|
"eval_runtime": 236.6177, |
|
"eval_samples_per_second": 142.707, |
|
"eval_steps_per_second": 2.231, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.032645548774581035, |
|
"grad_norm": 0.6774271130561829, |
|
"learning_rate": 9.996900125932385e-06, |
|
"loss": 7.6523, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.032645548774581035, |
|
"eval_accuracy": 0.09763750729113935, |
|
"eval_loss": 7.609375, |
|
"eval_runtime": 236.1388, |
|
"eval_samples_per_second": 142.996, |
|
"eval_steps_per_second": 2.236, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.032742419839194035, |
|
"grad_norm": 0.6921724677085876, |
|
"learning_rate": 9.996890438825923e-06, |
|
"loss": 7.6133, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.032742419839194035, |
|
"eval_accuracy": 0.09806679035368622, |
|
"eval_loss": 7.609375, |
|
"eval_runtime": 237.6892, |
|
"eval_samples_per_second": 142.064, |
|
"eval_steps_per_second": 2.221, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.032839290903807036, |
|
"grad_norm": 0.6743577122688293, |
|
"learning_rate": 9.996880751719461e-06, |
|
"loss": 7.6367, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.032839290903807036, |
|
"eval_accuracy": 0.09835668451973549, |
|
"eval_loss": 7.60546875, |
|
"eval_runtime": 236.3158, |
|
"eval_samples_per_second": 142.889, |
|
"eval_steps_per_second": 2.234, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.03293616196842003, |
|
"grad_norm": 0.7336851358413696, |
|
"learning_rate": 9.996871064613001e-06, |
|
"loss": 7.6641, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03293616196842003, |
|
"eval_accuracy": 0.09847195899239458, |
|
"eval_loss": 7.6015625, |
|
"eval_runtime": 235.9339, |
|
"eval_samples_per_second": 143.121, |
|
"eval_steps_per_second": 2.238, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03303303303303303, |
|
"grad_norm": 0.6606389880180359, |
|
"learning_rate": 9.99686137750654e-06, |
|
"loss": 7.6367, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.03303303303303303, |
|
"eval_accuracy": 0.09851552706907764, |
|
"eval_loss": 7.6015625, |
|
"eval_runtime": 236.0582, |
|
"eval_samples_per_second": 143.045, |
|
"eval_steps_per_second": 2.237, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.03312990409764603, |
|
"grad_norm": 0.6327222585678101, |
|
"learning_rate": 9.996851690400077e-06, |
|
"loss": 7.6133, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.03312990409764603, |
|
"eval_accuracy": 0.09849986572058227, |
|
"eval_loss": 7.59765625, |
|
"eval_runtime": 239.9306, |
|
"eval_samples_per_second": 140.737, |
|
"eval_steps_per_second": 2.201, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.03322677516225903, |
|
"grad_norm": 0.6791363954544067, |
|
"learning_rate": 9.996842003293617e-06, |
|
"loss": 7.6016, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.03322677516225903, |
|
"eval_accuracy": 0.09839912358978024, |
|
"eval_loss": 7.59765625, |
|
"eval_runtime": 238.699, |
|
"eval_samples_per_second": 141.463, |
|
"eval_steps_per_second": 2.212, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.033323646226872033, |
|
"grad_norm": 0.6186023354530334, |
|
"learning_rate": 9.996832316187157e-06, |
|
"loss": 7.668, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.033323646226872033, |
|
"eval_accuracy": 0.09836293747957837, |
|
"eval_loss": 7.59375, |
|
"eval_runtime": 239.8775, |
|
"eval_samples_per_second": 140.768, |
|
"eval_steps_per_second": 2.201, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.033420517291485034, |
|
"grad_norm": 0.6474988460540771, |
|
"learning_rate": 9.996822629080695e-06, |
|
"loss": 7.6172, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.033420517291485034, |
|
"eval_accuracy": 0.09842074840923688, |
|
"eval_loss": 7.58984375, |
|
"eval_runtime": 237.6838, |
|
"eval_samples_per_second": 142.067, |
|
"eval_steps_per_second": 2.221, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.033517388356098035, |
|
"grad_norm": 0.6356053352355957, |
|
"learning_rate": 9.996812941974233e-06, |
|
"loss": 7.6016, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.033517388356098035, |
|
"eval_accuracy": 0.09849783929841095, |
|
"eval_loss": 7.58984375, |
|
"eval_runtime": 236.5606, |
|
"eval_samples_per_second": 142.741, |
|
"eval_steps_per_second": 2.232, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.033614259420711036, |
|
"grad_norm": 0.6287916898727417, |
|
"learning_rate": 9.996803254867772e-06, |
|
"loss": 7.6328, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.033614259420711036, |
|
"eval_accuracy": 0.0984772855878163, |
|
"eval_loss": 7.5859375, |
|
"eval_runtime": 235.9208, |
|
"eval_samples_per_second": 143.129, |
|
"eval_steps_per_second": 2.238, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.03371113048532404, |
|
"grad_norm": 0.602911114692688, |
|
"learning_rate": 9.99679356776131e-06, |
|
"loss": 7.668, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.03371113048532404, |
|
"eval_accuracy": 0.09856062943683325, |
|
"eval_loss": 7.58203125, |
|
"eval_runtime": 235.2916, |
|
"eval_samples_per_second": 143.511, |
|
"eval_steps_per_second": 2.244, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.03380800154993703, |
|
"grad_norm": 0.6358947157859802, |
|
"learning_rate": 9.996783880654848e-06, |
|
"loss": 7.6719, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.03380800154993703, |
|
"eval_accuracy": 0.09871625865958947, |
|
"eval_loss": 7.58203125, |
|
"eval_runtime": 237.9978, |
|
"eval_samples_per_second": 141.879, |
|
"eval_steps_per_second": 2.219, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.03390487261455003, |
|
"grad_norm": 0.6221932768821716, |
|
"learning_rate": 9.996774193548388e-06, |
|
"loss": 7.6602, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03390487261455003, |
|
"eval_accuracy": 0.09892819346981982, |
|
"eval_loss": 7.578125, |
|
"eval_runtime": 235.489, |
|
"eval_samples_per_second": 143.391, |
|
"eval_steps_per_second": 2.242, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03400174367916303, |
|
"grad_norm": 0.6026197075843811, |
|
"learning_rate": 9.996764506441926e-06, |
|
"loss": 7.6641, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.03400174367916303, |
|
"eval_accuracy": 0.09917735655022585, |
|
"eval_loss": 7.57421875, |
|
"eval_runtime": 236.3887, |
|
"eval_samples_per_second": 142.845, |
|
"eval_steps_per_second": 2.234, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.03409861474377603, |
|
"grad_norm": 0.6385203003883362, |
|
"learning_rate": 9.996754819335466e-06, |
|
"loss": 7.6445, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.03409861474377603, |
|
"eval_accuracy": 0.09936850605875623, |
|
"eval_loss": 7.57421875, |
|
"eval_runtime": 236.1623, |
|
"eval_samples_per_second": 142.982, |
|
"eval_steps_per_second": 2.236, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.034195485808389034, |
|
"grad_norm": 0.6434333324432373, |
|
"learning_rate": 9.996745132229004e-06, |
|
"loss": 7.5781, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.034195485808389034, |
|
"eval_accuracy": 0.09951446635286651, |
|
"eval_loss": 7.5703125, |
|
"eval_runtime": 236.8614, |
|
"eval_samples_per_second": 142.56, |
|
"eval_steps_per_second": 2.229, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.034292356873002035, |
|
"grad_norm": 0.588945209980011, |
|
"learning_rate": 9.996735445122544e-06, |
|
"loss": 7.6523, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.034292356873002035, |
|
"eval_accuracy": 0.09960855023939139, |
|
"eval_loss": 7.5703125, |
|
"eval_runtime": 238.835, |
|
"eval_samples_per_second": 141.382, |
|
"eval_steps_per_second": 2.211, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.034389227937615036, |
|
"grad_norm": 0.6065830588340759, |
|
"learning_rate": 9.996725758016082e-06, |
|
"loss": 7.6562, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.034389227937615036, |
|
"eval_accuracy": 0.09964855760283058, |
|
"eval_loss": 7.56640625, |
|
"eval_runtime": 237.4072, |
|
"eval_samples_per_second": 142.232, |
|
"eval_steps_per_second": 2.224, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.034486099002228036, |
|
"grad_norm": 0.6374432444572449, |
|
"learning_rate": 9.99671607090962e-06, |
|
"loss": 7.5977, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.034486099002228036, |
|
"eval_accuracy": 0.09976296360884482, |
|
"eval_loss": 7.56640625, |
|
"eval_runtime": 237.2058, |
|
"eval_samples_per_second": 142.353, |
|
"eval_steps_per_second": 2.226, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.03458297006684104, |
|
"grad_norm": 0.6186094284057617, |
|
"learning_rate": 9.996706383803158e-06, |
|
"loss": 7.5977, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.03458297006684104, |
|
"eval_accuracy": 0.09979237567921691, |
|
"eval_loss": 7.5625, |
|
"eval_runtime": 237.4024, |
|
"eval_samples_per_second": 142.235, |
|
"eval_steps_per_second": 2.224, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.03467984113145404, |
|
"grad_norm": 0.6533841490745544, |
|
"learning_rate": 9.996696696696698e-06, |
|
"loss": 7.5508, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.03467984113145404, |
|
"eval_accuracy": 0.09973910972499975, |
|
"eval_loss": 7.5625, |
|
"eval_runtime": 236.1905, |
|
"eval_samples_per_second": 142.965, |
|
"eval_steps_per_second": 2.235, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.03477671219606703, |
|
"grad_norm": 0.6210293769836426, |
|
"learning_rate": 9.996687009590236e-06, |
|
"loss": 7.6172, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.03477671219606703, |
|
"eval_accuracy": 0.09971864286106957, |
|
"eval_loss": 7.55859375, |
|
"eval_runtime": 237.087, |
|
"eval_samples_per_second": 142.425, |
|
"eval_steps_per_second": 2.227, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.03487358326068003, |
|
"grad_norm": 0.6240441799163818, |
|
"learning_rate": 9.996677322483775e-06, |
|
"loss": 7.5469, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03487358326068003, |
|
"eval_accuracy": 0.09969887077045526, |
|
"eval_loss": 7.5546875, |
|
"eval_runtime": 236.921, |
|
"eval_samples_per_second": 142.524, |
|
"eval_steps_per_second": 2.229, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.034970454325293034, |
|
"grad_norm": 0.593199610710144, |
|
"learning_rate": 9.996667635377313e-06, |
|
"loss": 7.6172, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.034970454325293034, |
|
"eval_accuracy": 0.09969548375054037, |
|
"eval_loss": 7.5546875, |
|
"eval_runtime": 237.3099, |
|
"eval_samples_per_second": 142.291, |
|
"eval_steps_per_second": 2.225, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.035067325389906034, |
|
"grad_norm": 0.6026429533958435, |
|
"learning_rate": 9.996657948270853e-06, |
|
"loss": 7.625, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.035067325389906034, |
|
"eval_accuracy": 0.0997558421823571, |
|
"eval_loss": 7.55078125, |
|
"eval_runtime": 236.3278, |
|
"eval_samples_per_second": 142.882, |
|
"eval_steps_per_second": 2.234, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.035164196454519035, |
|
"grad_norm": 0.5891857743263245, |
|
"learning_rate": 9.996648261164391e-06, |
|
"loss": 7.6289, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.035164196454519035, |
|
"eval_accuracy": 0.09990139719203311, |
|
"eval_loss": 7.55078125, |
|
"eval_runtime": 236.9824, |
|
"eval_samples_per_second": 142.487, |
|
"eval_steps_per_second": 2.228, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.035261067519132036, |
|
"grad_norm": 0.628919780254364, |
|
"learning_rate": 9.996638574057929e-06, |
|
"loss": 7.5234, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.035261067519132036, |
|
"eval_accuracy": 0.10019818119346481, |
|
"eval_loss": 7.546875, |
|
"eval_runtime": 238.3945, |
|
"eval_samples_per_second": 141.643, |
|
"eval_steps_per_second": 2.215, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.03535793858374504, |
|
"grad_norm": 0.6015390157699585, |
|
"learning_rate": 9.996628886951469e-06, |
|
"loss": 7.5703, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.03535793858374504, |
|
"eval_accuracy": 0.10060615787432482, |
|
"eval_loss": 7.54296875, |
|
"eval_runtime": 235.2415, |
|
"eval_samples_per_second": 143.542, |
|
"eval_steps_per_second": 2.245, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.03545480964835804, |
|
"grad_norm": 0.5987712740898132, |
|
"learning_rate": 9.996619199845007e-06, |
|
"loss": 7.5859, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.03545480964835804, |
|
"eval_accuracy": 0.1009971705067222, |
|
"eval_loss": 7.54296875, |
|
"eval_runtime": 237.4998, |
|
"eval_samples_per_second": 142.177, |
|
"eval_steps_per_second": 2.223, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.03555168071297104, |
|
"grad_norm": 0.6239945888519287, |
|
"learning_rate": 9.996609512738545e-06, |
|
"loss": 7.5469, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.03555168071297104, |
|
"eval_accuracy": 0.10135602092437215, |
|
"eval_loss": 7.5390625, |
|
"eval_runtime": 238.6262, |
|
"eval_samples_per_second": 141.506, |
|
"eval_steps_per_second": 2.213, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.03564855177758403, |
|
"grad_norm": 0.5930135250091553, |
|
"learning_rate": 9.996599825632085e-06, |
|
"loss": 7.5508, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.03564855177758403, |
|
"eval_accuracy": 0.10160049428489602, |
|
"eval_loss": 7.5390625, |
|
"eval_runtime": 241.151, |
|
"eval_samples_per_second": 140.024, |
|
"eval_steps_per_second": 2.189, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.03574542284219703, |
|
"grad_norm": 0.5912957191467285, |
|
"learning_rate": 9.996590138525624e-06, |
|
"loss": 7.6172, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.03574542284219703, |
|
"eval_accuracy": 0.10172242700183226, |
|
"eval_loss": 7.53515625, |
|
"eval_runtime": 236.1926, |
|
"eval_samples_per_second": 142.964, |
|
"eval_steps_per_second": 2.235, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.035842293906810034, |
|
"grad_norm": 0.5632056593894958, |
|
"learning_rate": 9.996580451419162e-06, |
|
"loss": 7.6172, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.035842293906810034, |
|
"eval_accuracy": 0.1017233823151416, |
|
"eval_loss": 7.53515625, |
|
"eval_runtime": 236.4678, |
|
"eval_samples_per_second": 142.797, |
|
"eval_steps_per_second": 2.233, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.035939164971423035, |
|
"grad_norm": 0.5804527401924133, |
|
"learning_rate": 9.9965707643127e-06, |
|
"loss": 7.5352, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.035939164971423035, |
|
"eval_accuracy": 0.10176124746085684, |
|
"eval_loss": 7.53125, |
|
"eval_runtime": 236.525, |
|
"eval_samples_per_second": 142.763, |
|
"eval_steps_per_second": 2.232, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"grad_norm": 0.5636327862739563, |
|
"learning_rate": 9.99656107720624e-06, |
|
"loss": 7.5859, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"eval_accuracy": 0.10177870364041822, |
|
"eval_loss": 7.53125, |
|
"eval_runtime": 235.9325, |
|
"eval_samples_per_second": 143.121, |
|
"eval_steps_per_second": 2.238, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.03613290710064904, |
|
"grad_norm": 0.5637556910514832, |
|
"learning_rate": 9.996551390099778e-06, |
|
"loss": 7.5586, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.03613290710064904, |
|
"eval_accuracy": 0.10174599139679573, |
|
"eval_loss": 7.52734375, |
|
"eval_runtime": 237.5048, |
|
"eval_samples_per_second": 142.174, |
|
"eval_steps_per_second": 2.223, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.03622977816526204, |
|
"grad_norm": 0.5569087862968445, |
|
"learning_rate": 9.996541702993316e-06, |
|
"loss": 7.6406, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.03622977816526204, |
|
"eval_accuracy": 0.10170641826667895, |
|
"eval_loss": 7.52734375, |
|
"eval_runtime": 238.2389, |
|
"eval_samples_per_second": 141.736, |
|
"eval_steps_per_second": 2.216, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.03632664922987504, |
|
"grad_norm": 0.5960782766342163, |
|
"learning_rate": 9.996532015886856e-06, |
|
"loss": 7.5273, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.03632664922987504, |
|
"eval_accuracy": 0.10178799623351806, |
|
"eval_loss": 7.5234375, |
|
"eval_runtime": 236.2956, |
|
"eval_samples_per_second": 142.902, |
|
"eval_steps_per_second": 2.234, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.03642352029448804, |
|
"grad_norm": 0.5876879096031189, |
|
"learning_rate": 9.996522328780394e-06, |
|
"loss": 7.5312, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.03642352029448804, |
|
"eval_accuracy": 0.10199868624155746, |
|
"eval_loss": 7.51953125, |
|
"eval_runtime": 236.7211, |
|
"eval_samples_per_second": 142.645, |
|
"eval_steps_per_second": 2.23, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.03652039135910104, |
|
"grad_norm": 0.5734599828720093, |
|
"learning_rate": 9.996512641673934e-06, |
|
"loss": 7.5898, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.03652039135910104, |
|
"eval_accuracy": 0.10233654871529031, |
|
"eval_loss": 7.51953125, |
|
"eval_runtime": 237.8605, |
|
"eval_samples_per_second": 141.961, |
|
"eval_steps_per_second": 2.22, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.036617262423714034, |
|
"grad_norm": 0.6157567501068115, |
|
"learning_rate": 9.996502954567472e-06, |
|
"loss": 7.5898, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.036617262423714034, |
|
"eval_accuracy": 0.10266948987803573, |
|
"eval_loss": 7.515625, |
|
"eval_runtime": 236.7976, |
|
"eval_samples_per_second": 142.599, |
|
"eval_steps_per_second": 2.23, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.036714133488327035, |
|
"grad_norm": 0.5708773136138916, |
|
"learning_rate": 9.99649326746101e-06, |
|
"loss": 7.543, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.036714133488327035, |
|
"eval_accuracy": 0.10289378586351103, |
|
"eval_loss": 7.515625, |
|
"eval_runtime": 236.0204, |
|
"eval_samples_per_second": 143.068, |
|
"eval_steps_per_second": 2.237, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.036811004552940035, |
|
"grad_norm": 0.5848063230514526, |
|
"learning_rate": 9.99648358035455e-06, |
|
"loss": 7.5156, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.036811004552940035, |
|
"eval_accuracy": 0.10301302633384825, |
|
"eval_loss": 7.51171875, |
|
"eval_runtime": 237.3865, |
|
"eval_samples_per_second": 142.245, |
|
"eval_steps_per_second": 2.224, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.036907875617553036, |
|
"grad_norm": 0.5812904238700867, |
|
"learning_rate": 9.996473893248087e-06, |
|
"loss": 7.5664, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.036907875617553036, |
|
"eval_accuracy": 0.1031038968937872, |
|
"eval_loss": 7.51171875, |
|
"eval_runtime": 236.029, |
|
"eval_samples_per_second": 143.063, |
|
"eval_steps_per_second": 2.237, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.03700474668216604, |
|
"grad_norm": 0.5508126020431519, |
|
"learning_rate": 9.996464206141625e-06, |
|
"loss": 7.5625, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.03700474668216604, |
|
"eval_accuracy": 0.10313258524195525, |
|
"eval_loss": 7.5078125, |
|
"eval_runtime": 239.8272, |
|
"eval_samples_per_second": 140.797, |
|
"eval_steps_per_second": 2.202, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.03710161774677904, |
|
"grad_norm": 0.5484282374382019, |
|
"learning_rate": 9.996454519035165e-06, |
|
"loss": 7.5312, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.03710161774677904, |
|
"eval_accuracy": 0.10315090988816147, |
|
"eval_loss": 7.5078125, |
|
"eval_runtime": 237.933, |
|
"eval_samples_per_second": 141.918, |
|
"eval_steps_per_second": 2.219, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.03719848881139204, |
|
"grad_norm": 0.5489575266838074, |
|
"learning_rate": 9.996444831928703e-06, |
|
"loss": 7.625, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.03719848881139204, |
|
"eval_accuracy": 0.10320996562001093, |
|
"eval_loss": 7.5078125, |
|
"eval_runtime": 238.4765, |
|
"eval_samples_per_second": 141.595, |
|
"eval_steps_per_second": 2.214, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.03729535987600504, |
|
"grad_norm": 0.5515839457511902, |
|
"learning_rate": 9.996435144822241e-06, |
|
"loss": 7.5898, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.03729535987600504, |
|
"eval_accuracy": 0.1033970622841987, |
|
"eval_loss": 7.50390625, |
|
"eval_runtime": 237.4396, |
|
"eval_samples_per_second": 142.213, |
|
"eval_steps_per_second": 2.224, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.03739223094061804, |
|
"grad_norm": 0.5489226579666138, |
|
"learning_rate": 9.996425457715781e-06, |
|
"loss": 7.5625, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.03739223094061804, |
|
"eval_accuracy": 0.10353740649400565, |
|
"eval_loss": 7.5, |
|
"eval_runtime": 235.8228, |
|
"eval_samples_per_second": 143.188, |
|
"eval_steps_per_second": 2.239, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.037489102005231034, |
|
"grad_norm": 0.5816370248794556, |
|
"learning_rate": 9.99641577060932e-06, |
|
"loss": 7.5664, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.037489102005231034, |
|
"eval_accuracy": 0.10371405260956713, |
|
"eval_loss": 7.5, |
|
"eval_runtime": 236.7279, |
|
"eval_samples_per_second": 142.641, |
|
"eval_steps_per_second": 2.23, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.037585973069844035, |
|
"grad_norm": 0.6061241030693054, |
|
"learning_rate": 9.996406083502859e-06, |
|
"loss": 7.4609, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.037585973069844035, |
|
"eval_accuracy": 0.10385063346391309, |
|
"eval_loss": 7.49609375, |
|
"eval_runtime": 236.382, |
|
"eval_samples_per_second": 142.849, |
|
"eval_steps_per_second": 2.234, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.037682844134457036, |
|
"grad_norm": 0.5538076758384705, |
|
"learning_rate": 9.996396396396397e-06, |
|
"loss": 7.5469, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.037682844134457036, |
|
"eval_accuracy": 0.10396234722332831, |
|
"eval_loss": 7.49609375, |
|
"eval_runtime": 236.2463, |
|
"eval_samples_per_second": 142.931, |
|
"eval_steps_per_second": 2.235, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.03777971519907004, |
|
"grad_norm": 0.5397905707359314, |
|
"learning_rate": 9.996386709289936e-06, |
|
"loss": 7.5742, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03777971519907004, |
|
"eval_accuracy": 0.10398232195615975, |
|
"eval_loss": 7.4921875, |
|
"eval_runtime": 236.8546, |
|
"eval_samples_per_second": 142.564, |
|
"eval_steps_per_second": 2.229, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03787658626368304, |
|
"grad_norm": 0.5839679837226868, |
|
"learning_rate": 9.996377022183475e-06, |
|
"loss": 7.4375, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.03787658626368304, |
|
"eval_accuracy": 0.10396709484098679, |
|
"eval_loss": 7.4921875, |
|
"eval_runtime": 236.6285, |
|
"eval_samples_per_second": 142.7, |
|
"eval_steps_per_second": 2.231, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.03797345732829604, |
|
"grad_norm": 0.5725728869438171, |
|
"learning_rate": 9.996367335077013e-06, |
|
"loss": 7.4961, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.03797345732829604, |
|
"eval_accuracy": 0.10393614847954215, |
|
"eval_loss": 7.48828125, |
|
"eval_runtime": 237.6574, |
|
"eval_samples_per_second": 142.083, |
|
"eval_steps_per_second": 2.222, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.03797345732829604, |
|
"step": 392, |
|
"total_flos": 2185295466332160.0, |
|
"train_loss": 8.552704480229592, |
|
"train_runtime": 93913.2173, |
|
"train_samples_per_second": 703.461, |
|
"train_steps_per_second": 10.992 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1032300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2185295466332160.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|