peft_config = LoraConfig(
r=8,
lora_alpha=16,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)
training_args = TrainingArguments(
per_device_train_batch_size=4,
per_device_eval_batch_size=1,
gradient_accumulation_steps=4,
save_steps=10,
gradient_checkpointing=True,
learning_rate=5e-5,
lr_scheduler_type="cosine",
max_steps=100,
save_strategy="no",
logging_steps=1,
output_dir="new_model/",
optim="paged_adamw_32bit",
warmup_steps=10,
fp16=True,
report_to="wandb",
evaluation_strategy="steps",
eval_steps=20,
)