lolcats / configs /experiment /distill_alpaca_clean_xent1_mse1000_lr1e-2.yaml
ariG23498's picture
ariG23498 HF staff
chore: adding lolcats configs scrc and src
ae81e0f
raw
history blame
1.23 kB
dataset:
name: alpaca_clean
dataset_config:
name: default
path: yahma/alpaca-cleaned
chunk_size: 1024 # sequence length for distilling
concat_data: true
cache_dir: 'data/alpaca' # Change this to where you want to save
pretrained_model_config: # will be updated based on model_config
pretrained_model_name_or_path: 'meta-llama/Meta-Llama-3.1-8B'
cache_dir: '/data_persistent2/sim_data/llama-3_1-8b/'
preprocess_config: null
dataloader:
batch_size: 1
num_workers: 2
drop_last: false
pin_memory: true
optimizer:
optim: adamw_torch_fused
lr: 0.01
weight_decay: 0.0
lr_scheduler:
lr_scheduler_type: reduce_lr_on_plateau
mode: min
factor: 0.1
patience: 10
min_lr: 0.00001
trainer: # HuggingFace Trainer-like arguments
name: distill_attention_xent_mse
reverse_kl: false
mse_factor: 1000
xent_factor: 1
bf16: true
train_split: train
val_split: validation
num_train_epochs: 2
gradient_accumulation_steps: 8
seed: 42
batch_size: 1
load_best_model_at_end: true
greater_is_better: false
metric_for_best_model: distill/eval/loss
logging_steps: 100
evaluation_strategy: steps
max_steps: -1
eval_steps: 100
max_eval_batches: null