[ { "loss": 0.6629, "grad_norm": 1.981173038482666, "learning_rate": 1.8575498575498575e-05, "epoch": 0.7122507122507122, "step": 500 }, { "eval_loss": 0.3868238925933838, "eval_accuracy": 0.8307254623044097, "eval_runtime": 3.5243, "eval_samples_per_second": 199.474, "eval_steps_per_second": 49.939, "epoch": 1.0, "step": 702 }, { "loss": 0.4271, "grad_norm": 0.7268210053443909, "learning_rate": 1.7150997150997152e-05, "epoch": 1.4245014245014245, "step": 1000 }, { "eval_loss": 0.12105754762887955, "eval_accuracy": 0.9544807965860598, "eval_runtime": 4.3029, "eval_samples_per_second": 163.38, "eval_steps_per_second": 40.903, "epoch": 2.0, "step": 1404 }, { "loss": 0.1895, "grad_norm": 0.8351957201957703, "learning_rate": 1.5726495726495726e-05, "epoch": 2.1367521367521367, "step": 1500 }, { "loss": 0.1534, "grad_norm": 0.24414250254631042, "learning_rate": 1.4301994301994305e-05, "epoch": 2.849002849002849, "step": 2000 }, { "eval_loss": 0.09802740812301636, "eval_accuracy": 0.9701280227596017, "eval_runtime": 3.5319, "eval_samples_per_second": 199.043, "eval_steps_per_second": 49.832, "epoch": 3.0, "step": 2106 }, { "loss": 0.1275, "grad_norm": 0.11179369688034058, "learning_rate": 1.2877492877492879e-05, "epoch": 3.561253561253561, "step": 2500 }, { "eval_loss": 0.093312107026577, "eval_accuracy": 0.9772403982930299, "eval_runtime": 3.7294, "eval_samples_per_second": 188.501, "eval_steps_per_second": 47.192, "epoch": 4.0, "step": 2808 }, { "loss": 0.1302, "grad_norm": 0.037690628319978714, "learning_rate": 1.1452991452991454e-05, "epoch": 4.273504273504273, "step": 3000 }, { "loss": 0.1332, "grad_norm": 0.06977783888578415, "learning_rate": 1.002849002849003e-05, "epoch": 4.985754985754986, "step": 3500 }, { "eval_loss": 0.09014463424682617, "eval_accuracy": 0.9786628733997155, "eval_runtime": 3.6975, "eval_samples_per_second": 190.126, "eval_steps_per_second": 47.599, "epoch": 5.0, "step": 3510 }, { "loss": 0.1051, "grad_norm": 0.07191012799739838, "learning_rate": 8.603988603988605e-06, "epoch": 5.698005698005698, "step": 4000 }, { "eval_loss": 0.08116021007299423, "eval_accuracy": 0.9800853485064012, "eval_runtime": 3.558, "eval_samples_per_second": 197.582, "eval_steps_per_second": 49.466, "epoch": 6.0, "step": 4212 }, { "loss": 0.1026, "grad_norm": 9.66889476776123, "learning_rate": 7.17948717948718e-06, "epoch": 6.410256410256411, "step": 4500 }, { "eval_loss": 0.07601884752511978, "eval_accuracy": 0.9800853485064012, "eval_runtime": 7.4677, "eval_samples_per_second": 94.138, "eval_steps_per_second": 23.568, "epoch": 7.0, "step": 4914 }, { "loss": 0.092, "grad_norm": 0.02262728288769722, "learning_rate": 5.7549857549857555e-06, "epoch": 7.122507122507122, "step": 5000 }, { "loss": 0.0864, "grad_norm": 0.1124846562743187, "learning_rate": 4.330484330484331e-06, "epoch": 7.834757834757835, "step": 5500 }, { "eval_loss": 0.07228563725948334, "eval_accuracy": 0.9800853485064012, "eval_runtime": 4.228, "eval_samples_per_second": 166.273, "eval_steps_per_second": 41.627, "epoch": 8.0, "step": 5616 }, { "loss": 0.1273, "grad_norm": 0.0037623795215040445, "learning_rate": 2.9059829059829063e-06, "epoch": 8.547008547008547, "step": 6000 }, { "eval_loss": 0.0683717355132103, "eval_accuracy": 0.9815078236130867, "eval_runtime": 3.5474, "eval_samples_per_second": 198.175, "eval_steps_per_second": 49.614, "epoch": 9.0, "step": 6318 }, { "loss": 0.0724, "grad_norm": 0.004779215436428785, "learning_rate": 1.4814814814814815e-06, "epoch": 9.25925925925926, "step": 6500 }, { "loss": 0.1116, "grad_norm": 2.0302906036376953, "learning_rate": 5.6980056980056986e-08, "epoch": 9.971509971509972, "step": 7000 }, { "eval_loss": 0.06746786087751389, "eval_accuracy": 0.9815078236130867, "eval_runtime": 7.6005, "eval_samples_per_second": 92.494, "eval_steps_per_second": 23.156, "epoch": 10.0, "step": 7020 }, { "train_runtime": 449.8629, "train_samples_per_second": 62.419, "train_steps_per_second": 15.605, "total_flos": 520823902075200.0, "train_loss": 0.17984749846308998, "epoch": 10.0, "step": 7020 } ]