PocketDoc commited on
Commit
48e0190
·
1 Parent(s): e2d90b8

Upload 2-PKTDC-llama-30B-gptq-lora-24gb.yml

Browse files
trainer config/2-PKTDC-llama-30B-gptq-lora-24gb.yml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # accelerate launch ./scripts/finetune.py 2-PKTDC-llama-30B-gptq-lora-24gb.yml
2
+ #
3
+ # base model settings (local or huggingface repo)
4
+ base_model: PocketDoc/llama-30b-gptq-4bit-128g
5
+ base_model_config: PocketDoc/llama-30b-gptq-4bit-128g
6
+ model_type: LlamaForCausalLM
7
+ tokenizer_type: LlamaTokenizer
8
+ trust_remote_code:
9
+
10
+ # wandb configuration
11
+ wandb_project: llama-30b-gptq-4bit-128g-lora
12
+ wandb_watch:
13
+ wandb_run_id:
14
+ wandb_log_model:
15
+
16
+ # where to save the finished model to
17
+ output_dir: ./llama-30b-gptq-4bit-128g-lora
18
+
19
+ # dataset settings (local or huggingface repo)
20
+ datasets:
21
+ - path: dansmeth.json
22
+ type: pygmalion
23
+
24
+ dataset_prepared_path: data/last_run_prepared
25
+
26
+ # percentage of the dataset to set aside as evaluation.
27
+ val_set_size: 0.02
28
+
29
+ # max token length / prompt
30
+ sequence_len: 2048
31
+
32
+ # max sequence length to concatenate training samples together up to
33
+ # inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
34
+ max_packed_sequence_len: 2048
35
+
36
+ # quantized model loading settings
37
+ gptq: true
38
+ gptq_groupsize: 128 # group size
39
+ gptq_model_v1: false # v1 or v2
40
+ strict: false
41
+
42
+ # this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
43
+ load_in_8bit: true
44
+
45
+ load_in_4bit:
46
+
47
+ # Use CUDA bf16
48
+ bf16: false
49
+ # Use CUDA fp16
50
+ fp16: true
51
+ # Use CUDA tf32
52
+ tf32: true
53
+
54
+ # training hyperparameters
55
+ gradient_accumulation_steps: 32
56
+ micro_batch_size: 1
57
+ eval_batch_size: 1
58
+ num_epochs: 3
59
+ warmup_steps: 350
60
+ learning_rate: 0.00003
61
+
62
+ logging_steps: 1
63
+ eval_steps: 25
64
+ save_steps: 175
65
+
66
+ # stop training after this many evaluation losses have increased in a row
67
+ # https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
68
+ early_stopping_patience:
69
+ # specify a scheduler to use with the optimizer. only one_cycle is supported currently
70
+ lr_scheduler: linear
71
+ # specify optimizer
72
+ optimizer: paged_adamw_8bit
73
+ # specify weight decay
74
+ weight_decay: 0.05
75
+
76
+
77
+ # if you already have a lora model trained that you want to load, put that here
78
+ lora_model_dir:
79
+
80
+ # LoRA hyperparameters
81
+ adapter: lora # blank for full finetune
82
+ lora_r: 32
83
+ lora_alpha: 64
84
+ lora_dropout: 0.05
85
+ lora_target_linear:
86
+ lora_target_modules:
87
+ - q_proj
88
+ - v_proj
89
+ # - k_proj
90
+ # - o_proj
91
+ # - gate_proj
92
+ # - down_proj
93
+ # - up_proj
94
+ lora_modules_to_save:
95
+ # - embed_tokens
96
+ # - lm_head
97
+ lora_out_dir:
98
+ lora_fan_in_fan_out: false
99
+
100
+
101
+ # whether to mask out or include the human's prompt from the training labels
102
+ train_on_inputs: false
103
+ # don't use this, leads to wonky training (according to someone on the internet)
104
+ group_by_length: true
105
+
106
+
107
+ # does not work with current implementation of 4-bit LoRA
108
+ gradient_checkpointing: true
109
+
110
+
111
+ # whether to use xformers attention patch https://github.com/facebookresearch/xformers:
112
+ xformers_attention: true
113
+ # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
114
+ flash_attention: # require a100 for llama
115
+ # whether to use scaled-dot-product attention
116
+ # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
117
+ sdp_attention:
118
+
119
+
120
+ # resume from a specific checkpoint dir
121
+ resume_from_checkpoint:
122
+ # if resume_from_checkpoint isn't set and you simply want it to start where it left off
123
+ # be careful with this being turned on between different models
124
+ auto_resume_from_checkpoints:
125
+
126
+
127
+ # don't mess with this, it's here for accelerate and torchrun
128
+ local_rank:
129
+
130
+ # add or change special tokens
131
+ special_tokens:
132
+ # sys_role_token: "<|system|>"
133
+ # user_role_token: "<|user|>"
134
+ # model_role_token: "<|model|>"
135
+ bos_token: "<s>"
136
+ eos_token: "</s>"
137
+ unk_token: "<unk>"
138
+
139
+ # add extra tokens
140
+ tokens:
141
+
142
+
143
+ # FSDP
144
+ fsdp:
145
+
146
+ fsdp_config:
147
+
148
+ # Deepspeed
149
+ deepspeed:
150
+
151
+ # TODO
152
+ torchdistx_path:
153
+
154
+ # Debug mode
155
+ debug: