Spaces:

ariG23498
/

lolcats

Sleeping

lolcats / configs /experiment /distill_alpaca_clean_xent1_mse1000_lr1e-2.yaml

ariG23498 HF staff

chore: adding lolcats configs scrc and src

ae81e0f about 1 month ago

1.23 kB

	dataset:
	name: alpaca_clean
	dataset_config:
	name: default
	path: yahma/alpaca-cleaned
	chunk_size: 1024 # sequence length for distilling
	concat_data: true
	cache_dir: 'data/alpaca' # Change this to where you want to save
	pretrained_model_config: # will be updated based on model_config
	pretrained_model_name_or_path: 'meta-llama/Meta-Llama-3.1-8B'
	cache_dir: '/data_persistent2/sim_data/llama-3_1-8b/'
	preprocess_config: null

	dataloader:
	batch_size: 1
	num_workers: 2
	drop_last: false
	pin_memory: true

	optimizer:
	optim: adamw_torch_fused
	lr: 0.01
	weight_decay: 0.0

	lr_scheduler:
	lr_scheduler_type: reduce_lr_on_plateau
	mode: min
	factor: 0.1
	patience: 10
	min_lr: 0.00001

	trainer: # HuggingFace Trainer-like arguments
	name: distill_attention_xent_mse
	reverse_kl: false
	mse_factor: 1000
	xent_factor: 1

	bf16: true
	train_split: train
	val_split: validation
	num_train_epochs: 2
	gradient_accumulation_steps: 8
	seed: 42
	batch_size: 1
	load_best_model_at_end: true
	greater_is_better: false
	metric_for_best_model: distill/eval/loss
	logging_steps: 100
	evaluation_strategy: steps
	max_steps: -1
	eval_steps: 100
	max_eval_batches: null