,loss,learning_rate,epoch,step,eval_loss,eval_accuracy,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss 0,0.4794,2e-05,1.0,3750,,,,,,,,,,, 1,,,1.0,3750,0.41642841696739197,0.8346666666666667,0.8354288225130301,4.8505,618.487,19.379,,,,, 2,0.3653,2e-05,2.0,7500,,,,,,,,,,, 3,,,2.0,7500,0.4315239191055298,0.8363333333333334,0.8331066301394175,4.8532,618.146,19.369,,,,, 4,0.2753,2e-05,3.0,11250,,,,,,,,,,, 5,,,3.0,11250,0.4639851152896881,0.8326666666666667,0.8307922198552634,4.8504,618.504,19.38,,,,, 6,0.1993,2e-05,4.0,15000,,,,,,,,,,, 7,,,4.0,15000,0.5914837718009949,0.8223333333333334,0.8192726925927007,4.8531,618.167,19.369,,,,, 8,,,4.0,15000,,,,,,,2569.2881,4670.555,145.955,6.146684641240013e+16,0.3298242228190104