model_name: "google/gemma-2-2b-it" | |
new_model_name: "gemma-2-2b-ft" | |
# LoRA Paraments | |
lora_r: 64 | |
lora_alpha: 16 | |
lora_dropout: 0.1 | |
#bitsandbytes parameters | |
use_4bit: True | |
bnb_4bit_compute_dtype: "float16" | |
bnb_4bit_quant_type: "nf4" | |
use_nested_quant: False | |
#Training Arguments | |
num_train_epochs: 1 | |
fp16: False | |
bf16: False | |
per_device_train_batch_size: 2 | |
per_device_eval_batch_size: 2 | |
gradient_accumulation_steps: 2 | |
gradient_checkpointing: True | |
eval_strategy: "steps" | |
eval_steps: 0.2 | |
max_grad_norm: 0.3 | |
learning_rate: 2e-4 | |
weight_decay: 0.001 | |
optimizer: "paged_adamw_32bit" | |
lr_scheduler_type: "constant" | |
max_steps: -1 | |
warmup_steps: 5 | |
group_by_length: True | |
save_steps: 50 | |
logging_steps: 50 | |
logging_strategy: "steps" | |
#SFT Arguments | |
max_seq_length: 128 | |
packing: True | |
device_map: "auto" | |