File size: 1,210 Bytes
ae81e0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
dataset:
name: alpaca_clean
dataset_config:
name: default
path: yahma/alpaca-cleaned
chunk_size: 1024
concat_data: true
cache_dir: "data/alpaca"
pretrained_model_config:
pretrained_model_name_or_path: "mistralai/Mistral-7B-v0.1" # will be updated based on model_config
cache_dir: "/scratch/"
preprocess_config: null
dataloader:
batch_size: 1
num_workers: 2
drop_last: false
pin_memory: true
optimizer:
optim: adamw_torch_fused
lr: 1e-4
weight_decay: 0.0
lr_scheduler:
lr_scheduler_type: reduce_lr_on_plateau
mode: min
factor: 0.1
patience: 10
min_lr: 0.00001
trainer: # HuggingFace Trainer-like arguments
name: default_lm
bf16: true
train_split: train
val_split: validation
num_train_epochs: 2
gradient_accumulation_steps: 8
seed: 42
batch_size: 1
load_best_model_at_end: true
greater_is_better: false
metric_for_best_model: eval/loss # eval/rouge/geometric_mean
logging_steps: 100
evaluation_strategy: steps
max_steps: -1
eval_steps: 100
max_eval_batches: null
finetune:
method: lora
kwargs:
r: 8
lora_alpha: 16
lora_dropout: 0 # 0.05
target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
|