lazarohurtado commited on
Commit
659759f
1 Parent(s): 3c7fecb

Upload axolotl_config.yml

Browse files
Files changed (1) hide show
  1. axolotl_config.yml +83 -0
axolotl_config.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen1.5-0.5B
2
+ model_type: Qwen2ForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+ trust_remote_code: true
5
+ save_safetensors: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: false
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: garage-bAInd/Open-Platypus
13
+ type: alpaca
14
+ prompt_style: chatml
15
+ - path: teknium/OpenHermes-2.5
16
+ type: sharegpt
17
+ conversation: qwen-7b-chat
18
+ - path: databricks/databricks-dolly-15k
19
+ type:
20
+ field_system: ""
21
+ field_instruction: instruction
22
+ field_input: context
23
+ field_output: response
24
+ format: |-
25
+ <|im_start|>system
26
+ You are a helpful assistant. Please give a concise and accurate answer<|im_end|>
27
+ <|im_start|>user
28
+ {instruction} {input}<|im_end|>
29
+ <|im_start|>assistant
30
+ no_input_format: |-
31
+ <|im_start|>system
32
+ You are a helpful assistant. Please give a concise and accurate answer<|im_end|>
33
+ <|im_start|>user
34
+ {instruction}<|im_end|>
35
+ <|im_start|>assistant
36
+ shuffle_merged_datasets: true
37
+ val_set_size: 0.04
38
+ chat_template: chatml
39
+ default_system_message: "You are a helpful assistant. Please give a concise and accurate answer"
40
+ output_dir: ./qwen_out
41
+
42
+ sequence_len: 2048
43
+ sample_packing: true
44
+ eval_sample_packing: false
45
+ pad_to_sequence_len: true
46
+
47
+ adapter: lora
48
+ lora_r: 8
49
+ lora_alpha: 16
50
+ lora_dropout: 0.05
51
+ lora_target_modules:
52
+ - q_proj
53
+ - v_proj
54
+ lora_target_linear: true
55
+ lora_modules_to_save:
56
+ - embed_tokens
57
+ - lm_head
58
+
59
+ wandb_project: qwen-0.5b-lora
60
+ wandb_name: qwen-lora
61
+ wandb_log_model: checkpoint
62
+
63
+ gradient_accumulation_steps: 16
64
+ micro_batch_size: 1
65
+ num_epochs: 4
66
+ optimizer: adamw_torch_fused
67
+ lr_scheduler: cosine
68
+ learning_rate: 0.0002
69
+ max_grad_norm: 1.0
70
+
71
+ train_on_inputs: false
72
+ group_by_length: false
73
+ bf16: true
74
+
75
+ gradient_checkpointing: false
76
+ logging_steps: 1
77
+ flash_attention: false
78
+ deepspeed: deepspeed_configs/zero1.json
79
+
80
+ warmup_steps: 4
81
+ evals_per_epoch: 0
82
+ saves_per_epoch: 1
83
+ weight_decay: 0.01