epoch = 3.0 train_loss = 0.39514872067704404 train_runtime = 2043.7626 train_samples_per_second = 404.549 train_steps_per_second = 3.16