|
[ |
|
{ |
|
"loss": 0.6629, |
|
"grad_norm": 1.981173038482666, |
|
"learning_rate": 1.8575498575498575e-05, |
|
"epoch": 0.7122507122507122, |
|
"step": 500 |
|
}, |
|
{ |
|
"eval_loss": 0.3868238925933838, |
|
"eval_accuracy": 0.8307254623044097, |
|
"eval_runtime": 3.5243, |
|
"eval_samples_per_second": 199.474, |
|
"eval_steps_per_second": 49.939, |
|
"epoch": 1.0, |
|
"step": 702 |
|
}, |
|
{ |
|
"loss": 0.4271, |
|
"grad_norm": 0.7268210053443909, |
|
"learning_rate": 1.7150997150997152e-05, |
|
"epoch": 1.4245014245014245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"eval_loss": 0.12105754762887955, |
|
"eval_accuracy": 0.9544807965860598, |
|
"eval_runtime": 4.3029, |
|
"eval_samples_per_second": 163.38, |
|
"eval_steps_per_second": 40.903, |
|
"epoch": 2.0, |
|
"step": 1404 |
|
}, |
|
{ |
|
"loss": 0.1895, |
|
"grad_norm": 0.8351957201957703, |
|
"learning_rate": 1.5726495726495726e-05, |
|
"epoch": 2.1367521367521367, |
|
"step": 1500 |
|
}, |
|
{ |
|
"loss": 0.1534, |
|
"grad_norm": 0.24414250254631042, |
|
"learning_rate": 1.4301994301994305e-05, |
|
"epoch": 2.849002849002849, |
|
"step": 2000 |
|
}, |
|
{ |
|
"eval_loss": 0.09802740812301636, |
|
"eval_accuracy": 0.9701280227596017, |
|
"eval_runtime": 3.5319, |
|
"eval_samples_per_second": 199.043, |
|
"eval_steps_per_second": 49.832, |
|
"epoch": 3.0, |
|
"step": 2106 |
|
}, |
|
{ |
|
"loss": 0.1275, |
|
"grad_norm": 0.11179369688034058, |
|
"learning_rate": 1.2877492877492879e-05, |
|
"epoch": 3.561253561253561, |
|
"step": 2500 |
|
}, |
|
{ |
|
"eval_loss": 0.093312107026577, |
|
"eval_accuracy": 0.9772403982930299, |
|
"eval_runtime": 3.7294, |
|
"eval_samples_per_second": 188.501, |
|
"eval_steps_per_second": 47.192, |
|
"epoch": 4.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"loss": 0.1302, |
|
"grad_norm": 0.037690628319978714, |
|
"learning_rate": 1.1452991452991454e-05, |
|
"epoch": 4.273504273504273, |
|
"step": 3000 |
|
}, |
|
{ |
|
"loss": 0.1332, |
|
"grad_norm": 0.06977783888578415, |
|
"learning_rate": 1.002849002849003e-05, |
|
"epoch": 4.985754985754986, |
|
"step": 3500 |
|
}, |
|
{ |
|
"eval_loss": 0.09014463424682617, |
|
"eval_accuracy": 0.9786628733997155, |
|
"eval_runtime": 3.6975, |
|
"eval_samples_per_second": 190.126, |
|
"eval_steps_per_second": 47.599, |
|
"epoch": 5.0, |
|
"step": 3510 |
|
}, |
|
{ |
|
"loss": 0.1051, |
|
"grad_norm": 0.07191012799739838, |
|
"learning_rate": 8.603988603988605e-06, |
|
"epoch": 5.698005698005698, |
|
"step": 4000 |
|
}, |
|
{ |
|
"eval_loss": 0.08116021007299423, |
|
"eval_accuracy": 0.9800853485064012, |
|
"eval_runtime": 3.558, |
|
"eval_samples_per_second": 197.582, |
|
"eval_steps_per_second": 49.466, |
|
"epoch": 6.0, |
|
"step": 4212 |
|
}, |
|
{ |
|
"loss": 0.1026, |
|
"grad_norm": 9.66889476776123, |
|
"learning_rate": 7.17948717948718e-06, |
|
"epoch": 6.410256410256411, |
|
"step": 4500 |
|
}, |
|
{ |
|
"eval_loss": 0.07601884752511978, |
|
"eval_accuracy": 0.9800853485064012, |
|
"eval_runtime": 7.4677, |
|
"eval_samples_per_second": 94.138, |
|
"eval_steps_per_second": 23.568, |
|
"epoch": 7.0, |
|
"step": 4914 |
|
}, |
|
{ |
|
"loss": 0.092, |
|
"grad_norm": 0.02262728288769722, |
|
"learning_rate": 5.7549857549857555e-06, |
|
"epoch": 7.122507122507122, |
|
"step": 5000 |
|
}, |
|
{ |
|
"loss": 0.0864, |
|
"grad_norm": 0.1124846562743187, |
|
"learning_rate": 4.330484330484331e-06, |
|
"epoch": 7.834757834757835, |
|
"step": 5500 |
|
}, |
|
{ |
|
"eval_loss": 0.07228563725948334, |
|
"eval_accuracy": 0.9800853485064012, |
|
"eval_runtime": 4.228, |
|
"eval_samples_per_second": 166.273, |
|
"eval_steps_per_second": 41.627, |
|
"epoch": 8.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"loss": 0.1273, |
|
"grad_norm": 0.0037623795215040445, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"epoch": 8.547008547008547, |
|
"step": 6000 |
|
}, |
|
{ |
|
"eval_loss": 0.0683717355132103, |
|
"eval_accuracy": 0.9815078236130867, |
|
"eval_runtime": 3.5474, |
|
"eval_samples_per_second": 198.175, |
|
"eval_steps_per_second": 49.614, |
|
"epoch": 9.0, |
|
"step": 6318 |
|
}, |
|
{ |
|
"loss": 0.0724, |
|
"grad_norm": 0.004779215436428785, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"epoch": 9.25925925925926, |
|
"step": 6500 |
|
}, |
|
{ |
|
"loss": 0.1116, |
|
"grad_norm": 2.0302906036376953, |
|
"learning_rate": 5.6980056980056986e-08, |
|
"epoch": 9.971509971509972, |
|
"step": 7000 |
|
}, |
|
{ |
|
"eval_loss": 0.06746786087751389, |
|
"eval_accuracy": 0.9815078236130867, |
|
"eval_runtime": 7.6005, |
|
"eval_samples_per_second": 92.494, |
|
"eval_steps_per_second": 23.156, |
|
"epoch": 10.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"train_runtime": 449.8629, |
|
"train_samples_per_second": 62.419, |
|
"train_steps_per_second": 15.605, |
|
"total_flos": 520823902075200.0, |
|
"train_loss": 0.17984749846308998, |
|
"epoch": 10.0, |
|
"step": 7020 |
|
} |
|
] |