xianchaowu commited on
Commit
39a4b83
·
1 Parent(s): e3763fe

lazy lora for llama2-7bhf

Browse files
Files changed (4) hide show
  1. README.md +72 -0
  2. adapter_config.json +260 -0
  3. adapter_model.bin +3 -0
  4. usage.py +51 -0
README.md CHANGED
@@ -1,3 +1,75 @@
1
  ---
2
  license: llama2
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: llama2
3
  ---
4
+
5
+ ##Lazy LoRA
6
+ Determine the rank of LoRA layers by the singular values of pretrained weight matrices.
7
+ Also, combines:
8
+ 1. LoRA: [LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS](https://arxiv.org/abs/2106.09685)
9
+ 2. Prefix Tuning: [Prefix-Tuning: Optimizing Continuous Prompts for Generation](https://aclanthology.org/2021.acl-long.3
10
+ 53/), [P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks](https://arxiv.or
11
+ g/pdf/2110.07602.pdf)
12
+ 3. Prompt Tuning: [The Power of Scale for Parameter-Efficient Prompt Tuning](https://arxiv.org/abs/2104.08691)
13
+ 4. LLaMA adapter: [] ()
14
+ in one model.
15
+
16
+ This allows you to perform LoRA (additional low rank adapters inserted to each linear layer), and prompt learning (additional virtual tokens attached to the input and to the attention layers acting as `past_key_values`)
17
+
18
+ ##Usage:
19
+ ```python
20
+ import sys
21
+ sys.path.insert(1, '/workspace/asr/peft/src')
22
+ # TODO set this path to the lazy-lora source code path, or you can install it from source code:
23
+ # TODO, please install lazylora for usage:
24
+ # git clone git@github.com:Xianchao-Wu/peft.git
25
+ # cd peft
26
+ # python setup.py install
27
+
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
29
+ from peft import PeftModel, PeftConfig
30
+ import os
31
+ import torch
32
+
33
+ #import ipdb; ipdb.set_trace()
34
+ cache_dir="/workspace/asr/peft/qlora"
35
+ # TODO set this cache_dir to the path where you stored (or, want to store) llama2-7bhf model
36
+
37
+ lazylora_dir=os.getcwd() # the path that contains 'adapter_config.json' and 'adapter_model.bin'
38
+
39
+ config = PeftConfig.from_pretrained(lazylora_dir)
40
+
41
+ tokenizer = AutoTokenizer.from_pretrained(
42
+ config.base_model_name_or_path,
43
+ cache_dir=cache_dir,
44
+ use_auth_token=True
45
+ )
46
+
47
+ bnb_config = BitsAndBytesConfig(
48
+ load_in_4bit=True,
49
+ bnb_4bit_use_double_quant=True,
50
+ bnb_4bit_quant_type='nf4',
51
+ bnb_4bit_compute_dtype=torch.bfloat16
52
+ )
53
+
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ config.base_model_name_or_path,
56
+ quantization_config=bnb_config,
57
+ device_map="auto",
58
+ cache_dir=cache_dir,
59
+ use_auth_token=True
60
+ )
61
+ #model.print_trainable_parameters()
62
+ print(sum(p.numel() for p in model.parameters()))
63
+ # 3,500,412,928 -> half-size of 7B due to 4-bit loading
64
+
65
+ model = PeftModel.from_pretrained(model, lazylora_dir)
66
+ print('after adding lazy lora parameters:')
67
+ model.print_trainable_parameters()
68
+ # trainable params: 0 || all params: 3,660,359,168 || trainable%: 0.0
69
+
70
+
71
+ ```
72
+
73
+ ##MMLU result:
74
+
75
+ {'mmlu_loss': 1.8361594152170253, 'mmlu_eval_accuracy_us_foreign_policy': 0.6363636363636364, 'mmlu_eval_accuracy_world_religions': 0.7368421052631579, 'mmlu_eval_accuracy_high_school_us_history': 0.6363636363636364, 'mmlu_eval_accuracy_high_school_psychology': 0.6166666666666667, 'mmlu_eval_accuracy_public_relations': 0.3333333333333333, 'mmlu_eval_accuracy_high_school_european_history': 0.6666666666666666, 'mmlu_eval_accuracy_econometrics': 0.16666666666666666, 'mmlu_eval_accuracy_high_school_microeconomics': 0.34615384615384615, 'mmlu_eval_accuracy_machine_learning': 0.18181818181818182, 'mmlu_eval_accuracy_high_school_mathematics': 0.3448275862068966, 'mmlu_eval_accuracy_high_school_computer_science': 0.5555555555555556, 'mmlu_eval_accuracy_professional_accounting': 0.3548387096774194, 'mmlu_eval_accuracy_high_school_world_history': 0.5, 'mmlu_eval_accuracy_marketing': 0.72, 'mmlu_eval_accuracy_sociology': 0.7272727272727273, 'mmlu_eval_accuracy_nutrition': 0.5454545454545454, 'mmlu_eval_accuracy_high_school_chemistry': 0.4090909090909091, 'mmlu_eval_accuracy_logical_fallacies': 0.5555555555555556, 'mmlu_eval_accuracy_college_mathematics': 0.18181818181818182, 'mmlu_eval_accuracy_computer_security': 0.2727272727272727, 'mmlu_eval_accuracy_miscellaneous': 0.6046511627906976, 'mmlu_eval_accuracy_high_school_statistics': 0.2608695652173913, 'mmlu_eval_accuracy_philosophy': 0.4117647058823529, 'mmlu_eval_accuracy_global_facts': 0.4, 'mmlu_eval_accuracy_management': 0.2727272727272727, 'mmlu_eval_accuracy_human_aging': 0.6956521739130435, 'mmlu_eval_accuracy_moral_scenarios': 0.25, 'mmlu_eval_accuracy_human_sexuality': 0.5, 'mmlu_eval_accuracy_abstract_algebra': 0.36363636363636365, 'mmlu_eval_accuracy_high_school_macroeconomics': 0.3488372093023256, 'mmlu_eval_accuracy_electrical_engineering': 0.375, 'mmlu_eval_accuracy_professional_medicine': 0.45161290322580644, 'mmlu_eval_accuracy_high_school_government_and_politics': 0.6666666666666666, 'mmlu_eval_accuracy_high_school_biology': 0.3125, 'mmlu_eval_accuracy_astronomy': 0.4375, 'mmlu_eval_accuracy_security_studies': 0.4074074074074074, 'mmlu_eval_accuracy_prehistory': 0.42857142857142855, 'mmlu_eval_accuracy_conceptual_physics': 0.3076923076923077, 'mmlu_eval_accuracy_college_medicine': 0.36363636363636365, 'mmlu_eval_accuracy_moral_disputes': 0.39473684210526316, 'mmlu_eval_accuracy_anatomy': 0.5, 'mmlu_eval_accuracy_clinical_knowledge': 0.41379310344827586, 'mmlu_eval_accuracy_college_computer_science': 0.5454545454545454, 'mmlu_eval_accuracy_high_school_geography': 0.5909090909090909, 'mmlu_eval_accuracy_college_chemistry': 0.125, 'mmlu_eval_accuracy_professional_psychology': 0.36231884057971014, 'mmlu_eval_accuracy_virology': 0.4444444444444444, 'mmlu_eval_accuracy_international_law': 0.8461538461538461, 'mmlu_eval_accuracy_medical_genetics': 0.8181818181818182, 'mmlu_eval_accuracy_formal_logic': 0.14285714285714285, 'mmlu_eval_accuracy_professional_law': 0.34705882352941175, 'mmlu_eval_accuracy_college_biology': 0.25, 'mmlu_eval_accuracy_jurisprudence': 0.45454545454545453, 'mmlu_eval_accuracy_business_ethics': 0.5454545454545454, 'mmlu_eval_accuracy_college_physics': 0.5454545454545454, 'mmlu_eval_accuracy_high_school_physics': 0.29411764705882354, 'mmlu_eval_accuracy_elementary_mathematics': 0.3170731707317073, 'mmlu_eval_accuracy': 0.4435841258637352, 'epoch': 1.36}
adapter_config.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lazy_lora_weights": true,
7
+ "is_r_by_svd": true,
8
+ "is_r_reuse": true,
9
+ "lazy_lora_alpha": 16.0,
10
+ "lazy_lora_dropout": 0.05,
11
+ "lazy_pre_adapter_type": "none",
12
+ "lazy_pre_lora_alpha": 0.1,
13
+ "modules_to_save": null,
14
+ "num_attention_heads": 32,
15
+ "num_layers": 32,
16
+ "num_transformer_submodules": 1,
17
+ "num_virtual_tokens": null,
18
+ "peft_type": "LAZY_LORA",
19
+ "prefix_tuning_config": null,
20
+ "prompt_tuning_config": null,
21
+ "r": 64,
22
+ "r_by_module_dict": {
23
+ "model.layers.0.mlp.down_proj": 58,
24
+ "model.layers.0.mlp.gate_proj": 50,
25
+ "model.layers.0.mlp.up_proj": 52,
26
+ "model.layers.0.self_attn.k_proj": 17,
27
+ "model.layers.0.self_attn.o_proj": 24,
28
+ "model.layers.0.self_attn.q_proj": 15,
29
+ "model.layers.0.self_attn.v_proj": 36,
30
+ "model.layers.1.mlp.down_proj": 62,
31
+ "model.layers.1.mlp.gate_proj": 55,
32
+ "model.layers.1.mlp.up_proj": 58,
33
+ "model.layers.1.self_attn.k_proj": 46,
34
+ "model.layers.1.self_attn.o_proj": 30,
35
+ "model.layers.1.self_attn.q_proj": 48,
36
+ "model.layers.1.self_attn.v_proj": 36,
37
+ "model.layers.10.mlp.down_proj": 62,
38
+ "model.layers.10.mlp.gate_proj": 63,
39
+ "model.layers.10.mlp.up_proj": 63,
40
+ "model.layers.10.self_attn.k_proj": 69,
41
+ "model.layers.10.self_attn.o_proj": 55,
42
+ "model.layers.10.self_attn.q_proj": 68,
43
+ "model.layers.10.self_attn.v_proj": 54,
44
+ "model.layers.11.mlp.down_proj": 63,
45
+ "model.layers.11.mlp.gate_proj": 62,
46
+ "model.layers.11.mlp.up_proj": 64,
47
+ "model.layers.11.self_attn.k_proj": 63,
48
+ "model.layers.11.self_attn.o_proj": 56,
49
+ "model.layers.11.self_attn.q_proj": 63,
50
+ "model.layers.11.self_attn.v_proj": 56,
51
+ "model.layers.12.mlp.down_proj": 63,
52
+ "model.layers.12.mlp.gate_proj": 62,
53
+ "model.layers.12.mlp.up_proj": 64,
54
+ "model.layers.12.self_attn.k_proj": 68,
55
+ "model.layers.12.self_attn.o_proj": 57,
56
+ "model.layers.12.self_attn.q_proj": 67,
57
+ "model.layers.12.self_attn.v_proj": 56,
58
+ "model.layers.13.mlp.down_proj": 64,
59
+ "model.layers.13.mlp.gate_proj": 62,
60
+ "model.layers.13.mlp.up_proj": 65,
61
+ "model.layers.13.self_attn.k_proj": 68,
62
+ "model.layers.13.self_attn.o_proj": 59,
63
+ "model.layers.13.self_attn.q_proj": 67,
64
+ "model.layers.13.self_attn.v_proj": 60,
65
+ "model.layers.14.mlp.down_proj": 64,
66
+ "model.layers.14.mlp.gate_proj": 62,
67
+ "model.layers.14.mlp.up_proj": 65,
68
+ "model.layers.14.self_attn.k_proj": 65,
69
+ "model.layers.14.self_attn.o_proj": 58,
70
+ "model.layers.14.self_attn.q_proj": 65,
71
+ "model.layers.14.self_attn.v_proj": 58,
72
+ "model.layers.15.mlp.down_proj": 65,
73
+ "model.layers.15.mlp.gate_proj": 63,
74
+ "model.layers.15.mlp.up_proj": 65,
75
+ "model.layers.15.self_attn.k_proj": 67,
76
+ "model.layers.15.self_attn.o_proj": 61,
77
+ "model.layers.15.self_attn.q_proj": 66,
78
+ "model.layers.15.self_attn.v_proj": 61,
79
+ "model.layers.16.mlp.down_proj": 65,
80
+ "model.layers.16.mlp.gate_proj": 63,
81
+ "model.layers.16.mlp.up_proj": 65,
82
+ "model.layers.16.self_attn.k_proj": 66,
83
+ "model.layers.16.self_attn.o_proj": 65,
84
+ "model.layers.16.self_attn.q_proj": 65,
85
+ "model.layers.16.self_attn.v_proj": 65,
86
+ "model.layers.17.mlp.down_proj": 65,
87
+ "model.layers.17.mlp.gate_proj": 64,
88
+ "model.layers.17.mlp.up_proj": 65,
89
+ "model.layers.17.self_attn.k_proj": 67,
90
+ "model.layers.17.self_attn.o_proj": 65,
91
+ "model.layers.17.self_attn.q_proj": 67,
92
+ "model.layers.17.self_attn.v_proj": 65,
93
+ "model.layers.18.mlp.down_proj": 65,
94
+ "model.layers.18.mlp.gate_proj": 64,
95
+ "model.layers.18.mlp.up_proj": 65,
96
+ "model.layers.18.self_attn.k_proj": 67,
97
+ "model.layers.18.self_attn.o_proj": 69,
98
+ "model.layers.18.self_attn.q_proj": 67,
99
+ "model.layers.18.self_attn.v_proj": 68,
100
+ "model.layers.19.mlp.down_proj": 65,
101
+ "model.layers.19.mlp.gate_proj": 65,
102
+ "model.layers.19.mlp.up_proj": 65,
103
+ "model.layers.19.self_attn.k_proj": 64,
104
+ "model.layers.19.self_attn.o_proj": 69,
105
+ "model.layers.19.self_attn.q_proj": 65,
106
+ "model.layers.19.self_attn.v_proj": 68,
107
+ "model.layers.2.mlp.down_proj": 63,
108
+ "model.layers.2.mlp.gate_proj": 60,
109
+ "model.layers.2.mlp.up_proj": 60,
110
+ "model.layers.2.self_attn.k_proj": 65,
111
+ "model.layers.2.self_attn.o_proj": 56,
112
+ "model.layers.2.self_attn.q_proj": 64,
113
+ "model.layers.2.self_attn.v_proj": 55,
114
+ "model.layers.20.mlp.down_proj": 66,
115
+ "model.layers.20.mlp.gate_proj": 65,
116
+ "model.layers.20.mlp.up_proj": 65,
117
+ "model.layers.20.self_attn.k_proj": 65,
118
+ "model.layers.20.self_attn.o_proj": 71,
119
+ "model.layers.20.self_attn.q_proj": 65,
120
+ "model.layers.20.self_attn.v_proj": 70,
121
+ "model.layers.21.mlp.down_proj": 66,
122
+ "model.layers.21.mlp.gate_proj": 66,
123
+ "model.layers.21.mlp.up_proj": 65,
124
+ "model.layers.21.self_attn.k_proj": 64,
125
+ "model.layers.21.self_attn.o_proj": 73,
126
+ "model.layers.21.self_attn.q_proj": 64,
127
+ "model.layers.21.self_attn.v_proj": 71,
128
+ "model.layers.22.mlp.down_proj": 66,
129
+ "model.layers.22.mlp.gate_proj": 66,
130
+ "model.layers.22.mlp.up_proj": 65,
131
+ "model.layers.22.self_attn.k_proj": 66,
132
+ "model.layers.22.self_attn.o_proj": 73,
133
+ "model.layers.22.self_attn.q_proj": 66,
134
+ "model.layers.22.self_attn.v_proj": 72,
135
+ "model.layers.23.mlp.down_proj": 66,
136
+ "model.layers.23.mlp.gate_proj": 66,
137
+ "model.layers.23.mlp.up_proj": 65,
138
+ "model.layers.23.self_attn.k_proj": 67,
139
+ "model.layers.23.self_attn.o_proj": 77,
140
+ "model.layers.23.self_attn.q_proj": 68,
141
+ "model.layers.23.self_attn.v_proj": 76,
142
+ "model.layers.24.mlp.down_proj": 66,
143
+ "model.layers.24.mlp.gate_proj": 67,
144
+ "model.layers.24.mlp.up_proj": 66,
145
+ "model.layers.24.self_attn.k_proj": 62,
146
+ "model.layers.24.self_attn.o_proj": 76,
147
+ "model.layers.24.self_attn.q_proj": 63,
148
+ "model.layers.24.self_attn.v_proj": 75,
149
+ "model.layers.25.mlp.down_proj": 66,
150
+ "model.layers.25.mlp.gate_proj": 67,
151
+ "model.layers.25.mlp.up_proj": 66,
152
+ "model.layers.25.self_attn.k_proj": 65,
153
+ "model.layers.25.self_attn.o_proj": 79,
154
+ "model.layers.25.self_attn.q_proj": 66,
155
+ "model.layers.25.self_attn.v_proj": 78,
156
+ "model.layers.26.mlp.down_proj": 66,
157
+ "model.layers.26.mlp.gate_proj": 67,
158
+ "model.layers.26.mlp.up_proj": 66,
159
+ "model.layers.26.self_attn.k_proj": 63,
160
+ "model.layers.26.self_attn.o_proj": 80,
161
+ "model.layers.26.self_attn.q_proj": 63,
162
+ "model.layers.26.self_attn.v_proj": 79,
163
+ "model.layers.27.mlp.down_proj": 66,
164
+ "model.layers.27.mlp.gate_proj": 67,
165
+ "model.layers.27.mlp.up_proj": 67,
166
+ "model.layers.27.self_attn.k_proj": 68,
167
+ "model.layers.27.self_attn.o_proj": 81,
168
+ "model.layers.27.self_attn.q_proj": 68,
169
+ "model.layers.27.self_attn.v_proj": 80,
170
+ "model.layers.28.mlp.down_proj": 67,
171
+ "model.layers.28.mlp.gate_proj": 67,
172
+ "model.layers.28.mlp.up_proj": 67,
173
+ "model.layers.28.self_attn.k_proj": 65,
174
+ "model.layers.28.self_attn.o_proj": 83,
175
+ "model.layers.28.self_attn.q_proj": 66,
176
+ "model.layers.28.self_attn.v_proj": 82,
177
+ "model.layers.29.mlp.down_proj": 68,
178
+ "model.layers.29.mlp.gate_proj": 67,
179
+ "model.layers.29.mlp.up_proj": 68,
180
+ "model.layers.29.self_attn.k_proj": 62,
181
+ "model.layers.29.self_attn.o_proj": 84,
182
+ "model.layers.29.self_attn.q_proj": 62,
183
+ "model.layers.29.self_attn.v_proj": 82,
184
+ "model.layers.3.mlp.down_proj": 62,
185
+ "model.layers.3.mlp.gate_proj": 63,
186
+ "model.layers.3.mlp.up_proj": 62,
187
+ "model.layers.3.self_attn.k_proj": 70,
188
+ "model.layers.3.self_attn.o_proj": 53,
189
+ "model.layers.3.self_attn.q_proj": 68,
190
+ "model.layers.3.self_attn.v_proj": 53,
191
+ "model.layers.30.mlp.down_proj": 67,
192
+ "model.layers.30.mlp.gate_proj": 68,
193
+ "model.layers.30.mlp.up_proj": 68,
194
+ "model.layers.30.self_attn.k_proj": 64,
195
+ "model.layers.30.self_attn.o_proj": 87,
196
+ "model.layers.30.self_attn.q_proj": 64,
197
+ "model.layers.30.self_attn.v_proj": 85,
198
+ "model.layers.31.mlp.down_proj": 67,
199
+ "model.layers.31.mlp.gate_proj": 71,
200
+ "model.layers.31.mlp.up_proj": 70,
201
+ "model.layers.31.self_attn.k_proj": 63,
202
+ "model.layers.31.self_attn.o_proj": 78,
203
+ "model.layers.31.self_attn.q_proj": 61,
204
+ "model.layers.31.self_attn.v_proj": 77,
205
+ "model.layers.4.mlp.down_proj": 61,
206
+ "model.layers.4.mlp.gate_proj": 64,
207
+ "model.layers.4.mlp.up_proj": 62,
208
+ "model.layers.4.self_attn.k_proj": 71,
209
+ "model.layers.4.self_attn.o_proj": 56,
210
+ "model.layers.4.self_attn.q_proj": 70,
211
+ "model.layers.4.self_attn.v_proj": 56,
212
+ "model.layers.5.mlp.down_proj": 61,
213
+ "model.layers.5.mlp.gate_proj": 64,
214
+ "model.layers.5.mlp.up_proj": 62,
215
+ "model.layers.5.self_attn.k_proj": 73,
216
+ "model.layers.5.self_attn.o_proj": 57,
217
+ "model.layers.5.self_attn.q_proj": 72,
218
+ "model.layers.5.self_attn.v_proj": 58,
219
+ "model.layers.6.mlp.down_proj": 61,
220
+ "model.layers.6.mlp.gate_proj": 65,
221
+ "model.layers.6.mlp.up_proj": 62,
222
+ "model.layers.6.self_attn.k_proj": 67,
223
+ "model.layers.6.self_attn.o_proj": 53,
224
+ "model.layers.6.self_attn.q_proj": 67,
225
+ "model.layers.6.self_attn.v_proj": 53,
226
+ "model.layers.7.mlp.down_proj": 61,
227
+ "model.layers.7.mlp.gate_proj": 65,
228
+ "model.layers.7.mlp.up_proj": 62,
229
+ "model.layers.7.self_attn.k_proj": 66,
230
+ "model.layers.7.self_attn.o_proj": 53,
231
+ "model.layers.7.self_attn.q_proj": 67,
232
+ "model.layers.7.self_attn.v_proj": 53,
233
+ "model.layers.8.mlp.down_proj": 61,
234
+ "model.layers.8.mlp.gate_proj": 64,
235
+ "model.layers.8.mlp.up_proj": 62,
236
+ "model.layers.8.self_attn.k_proj": 68,
237
+ "model.layers.8.self_attn.o_proj": 55,
238
+ "model.layers.8.self_attn.q_proj": 69,
239
+ "model.layers.8.self_attn.v_proj": 54,
240
+ "model.layers.9.mlp.down_proj": 62,
241
+ "model.layers.9.mlp.gate_proj": 63,
242
+ "model.layers.9.mlp.up_proj": 63,
243
+ "model.layers.9.self_attn.k_proj": 70,
244
+ "model.layers.9.self_attn.o_proj": 56,
245
+ "model.layers.9.self_attn.q_proj": 70,
246
+ "model.layers.9.self_attn.v_proj": 55
247
+ },
248
+ "rank_file": "",
249
+ "target_modules": [
250
+ "down_proj",
251
+ "v_proj",
252
+ "q_proj",
253
+ "o_proj",
254
+ "up_proj",
255
+ "gate_proj",
256
+ "k_proj"
257
+ ],
258
+ "task_type": "CAUSAL_LM",
259
+ "token_dim": 4096
260
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa846c06af3188a3d7b7c3e0a32ad4c1ecb48b1d02353610425c192c3ae4182
3
+ size 320063949
usage.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.insert(1, '/workspace/asr/peft/src')
3
+ # TODO set this path to the lazy-lora source code path, or you can install it from source code:
4
+ # TODO, please install lazylora for usage:
5
+ # git clone git@github.com:Xianchao-Wu/peft.git
6
+ # cd peft
7
+ # python setup.py install
8
+
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
10
+ from peft import PeftModel, PeftConfig
11
+ import os
12
+ import torch
13
+
14
+ #import ipdb; ipdb.set_trace()
15
+ cache_dir="/workspace/asr/peft/qlora"
16
+ # TODO set this cache_dir to the path where you stored (or, want to store) llama2-7bhf model
17
+
18
+ lazylora_dir=os.getcwd() # the path that contains 'adapter_config.json' and 'adapter_model.bin'
19
+
20
+ config = PeftConfig.from_pretrained(lazylora_dir)
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ config.base_model_name_or_path,
24
+ cache_dir=cache_dir,
25
+ use_auth_token=True
26
+ )
27
+
28
+ bnb_config = BitsAndBytesConfig(
29
+ load_in_4bit=True,
30
+ bnb_4bit_use_double_quant=True,
31
+ bnb_4bit_quant_type='nf4',
32
+ bnb_4bit_compute_dtype=torch.bfloat16
33
+ )
34
+
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ config.base_model_name_or_path,
37
+ quantization_config=bnb_config,
38
+ device_map="auto",
39
+ cache_dir=cache_dir,
40
+ use_auth_token=True
41
+ )
42
+ #model.print_trainable_parameters()
43
+ print(sum(p.numel() for p in model.parameters()))
44
+ # 3,500,412,928 -> half-size of 7B due to 4-bit loading
45
+
46
+ model = PeftModel.from_pretrained(model, lazylora_dir)
47
+ print('after adding lazy lora parameters:')
48
+ model.print_trainable_parameters()
49
+ # trainable params: 0 || all params: 3,660,359,168 || trainable%: 0.0
50
+
51
+