r=64, # Lora 秩 | |
lora_alpha=128, # Lora alaph,具体作用参见 Lora 原理 | |
lora_dropout=0.1# Dropout 比例 | |
) | |
args = TrainingArguments( | |
output_dir="/home/jovyan/notebook/39newllama/results-8b", | |
per_device_train_batch_size=4, | |
gradient_accumulation_steps=2, | |
logging_steps=50, | |
num_train_epochs=1, | |
save_steps=100, | |
learning_rate=1e-4, | |
save_on_each_node=True, | |
gradient_checkpointing=True, | |
#eval_steps=50, | |
weight_decay=1e-4, | |
#load_best_model_at_end=True, | |
#evaluation_strategy="steps" | |
) |