| step,tag,value | |
| 5,train/loss,0.6123999953269958 | |
| 5,train/grad_norm,1.7033919095993042 | |
| 5,train/learning_rate,1.5999999959603883e-05 | |
| 5,train/epoch,1.7999999523162842 | |
| 5,train/epoch,1.7999999523162842 | |
| 9,train/epoch,3.0 | |
| 5,eval/loss,0.6563763618469238 | |
| 5,eval/runtime,6.1168999671936035 | |
| 5,eval/samples_per_second,1.6349999904632568 | |
| 5,eval/steps_per_second,0.49000000953674316 | |
| 9,train/train_runtime,56.083099365234375 | |
| 9,train/train_samples_per_second,0.5350000262260437 | |
| 9,train/train_steps_per_second,0.1599999964237213 | |
| 9,train/total_flos,1403824824647680.0 | |
| 9,train/train_loss,0.5981923341751099 | |