llama3-central-pretrained-model-1 / trainer_config.yaml

Upload 18 files

bdd5ef6 verified 5 months ago

681 Bytes

	cutoff_len: 1024
	dataset: Central-SheungWan
	dataset_dir: data
	ddp_timeout: 180000000
	do_train: true
	finetuning_type: freeze
	flash_attn: auto
	fp16: true
	freeze_trainable_layers: 2
	freeze_trainable_modules: all
	gradient_accumulation_steps: 8
	learning_rate: 5.0e-05
	logging_steps: 5
	lr_scheduler_type: cosine
	max_grad_norm: 1.0
	max_samples: 10000
	model_name_or_path: shenzhi-wang/Llama3-8B-Chinese-Chat
	num_train_epochs: 3.0
	optim: adamw_torch
	output_dir: saves/LLaMA3-8B-Chinese-Chat/freeze/train_2024-05-30-09-37-42
	packing: true
	per_device_train_batch_size: 1
	plot_loss: true
	preprocessing_num_workers: 16
	report_to: none
	save_steps: 100
	stage: pt
	template: llama3
	warmup_steps: 0