lazarohurtado
/

Qwen1.5-0.5B-OpenIT

Question Answering

text-generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Qwen1.5-0.5B-OpenIT / axolotl_config.yml

lazarohurtado's picture

Upload axolotl_config.yml

659759f verified 3 months ago

history blame contribute delete

No virus

1.94 kB

	base_model: Qwen/Qwen1.5-0.5B
	model_type: Qwen2ForCausalLM
	tokenizer_type: AutoTokenizer
	trust_remote_code: true
	save_safetensors: true

	load_in_8bit: false
	load_in_4bit: false
	strict: false

	datasets:
	- path: garage-bAInd/Open-Platypus
	type: alpaca
	prompt_style: chatml
	- path: teknium/OpenHermes-2.5
	type: sharegpt
	conversation: qwen-7b-chat
	- path: databricks/databricks-dolly-15k
	type:
	field_system: ""
	field_instruction: instruction
	field_input: context
	field_output: response
	format: \|-
	<\|im_start\|>system
	You are a helpful assistant. Please give a concise and accurate answer<\|im_end\|>
	<\|im_start\|>user
	{instruction} {input}<\|im_end\|>
	<\|im_start\|>assistant
	no_input_format: \|-
	<\|im_start\|>system
	You are a helpful assistant. Please give a concise and accurate answer<\|im_end\|>
	<\|im_start\|>user
	{instruction}<\|im_end\|>
	<\|im_start\|>assistant
	shuffle_merged_datasets: true
	val_set_size: 0.04
	chat_template: chatml
	default_system_message: "You are a helpful assistant. Please give a concise and accurate answer"
	output_dir: ./qwen_out

	sequence_len: 2048
	sample_packing: true
	eval_sample_packing: false
	pad_to_sequence_len: true

	adapter: lora
	lora_r: 8
	lora_alpha: 16
	lora_dropout: 0.05
	lora_target_modules:
	- q_proj
	- v_proj
	lora_target_linear: true
	lora_modules_to_save:
	- embed_tokens
	- lm_head

	wandb_project: qwen-0.5b-lora
	wandb_name: qwen-lora
	wandb_log_model: checkpoint

	gradient_accumulation_steps: 16
	micro_batch_size: 1
	num_epochs: 4
	optimizer: adamw_torch_fused
	lr_scheduler: cosine
	learning_rate: 0.0002
	max_grad_norm: 1.0

	train_on_inputs: false
	group_by_length: false
	bf16: true

	gradient_checkpointing: false
	logging_steps: 1
	flash_attention: false
	deepspeed: deepspeed_configs/zero1.json

	warmup_steps: 4
	evals_per_epoch: 0
	saves_per_epoch: 1
	weight_decay: 0.01