first 512 training_args = TrainingArguments( output_dir="./results", learning_rate=5e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=5, weight_decay=0.01, evaluation_strategy="epoch", push_to_hub=True )