ChuckMcSneed commited on
Commit
e019a6a
1 Parent(s): 804dbf3

Upload LoRD-for-windows.py

Browse files
Files changed (1) hide show
  1. LoRD-for-windows.py +391 -0
LoRD-for-windows.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #My dirty fix for offline LORA conversion on cpu on Windows
2
+ ### Pick locations here:
3
+
4
+ base_model_id = 'C:/path/to/base-model' #eg C:/models/llama2-hf
5
+ target_model_id = 'C:/path/to/target-model' #eg C:/models/euryale
6
+ cache_dir = './models'
7
+ LORA_OUT_DIR = "./lora"
8
+
9
+ import os
10
+ from huggingface_hub import list_repo_files, snapshot_download
11
+
12
+ def init_transformers_model(local_path,cache_dir):
13
+ from os import listdir
14
+ from os.path import isfile, join
15
+ onlyfiles = [f for f in listdir(local_path) if isfile(join(local_path, f))]
16
+ has_safetensors = any(file.endswith('.safetensors') for file in onlyfiles)
17
+
18
+ print(f"Model will be loaded from: {local_path}")
19
+ if has_safetensors:
20
+ print("Note: .safetensors found. You better don't have .bin in there.")
21
+ return os.path.abspath(local_path), has_safetensors
22
+
23
+
24
+ # ### Downloading the base model
25
+
26
+ # In[4]:
27
+
28
+ base_model_download_path, base_model_has_safetensors = init_transformers_model(base_model_id, cache_dir)
29
+
30
+ models = {
31
+ 'base' : {
32
+ 'download_path' : base_model_download_path,
33
+ 'has_safetensors' : base_model_has_safetensors
34
+ },
35
+ 'target' : None
36
+ }
37
+
38
+
39
+ # ### Identifying relevant model layers
40
+ #
41
+ # Define functions to identify linear and embedding layers within transformer models. These layers are targets for LoRA adapters extraction.
42
+
43
+ # In[5]:
44
+
45
+
46
+ # This code has been modified from its original version on the Axolotl project.
47
+ # Copyright 2023 Axolotl contributors.
48
+ # Licensed under the Apache License, Version 2.0 (the "License");
49
+ # you may not use this file except in compliance with the License.
50
+ # You may obtain a copy of the License at
51
+ # http://www.apache.org/licenses/LICENSE-2.0
52
+ # Unless required by applicable law or agreed to in writing, software
53
+ # distributed under the License is distributed on an "AS IS" BASIS,
54
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
55
+ # See the License for the specific language governing permissions and
56
+ # limitations under the License.
57
+
58
+
59
+ import os
60
+ import torch
61
+ import torch
62
+ import bitsandbytes as bnb
63
+ from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
64
+ from peft.tuners.lora import QuantLinear
65
+
66
+
67
+ def get_linear_embedding_layers(model_type):
68
+ """
69
+ returns the linear embedding layers needed for loras, dependent on the model arch
70
+ """
71
+ if model_type == "gpt_neox":
72
+ return ["embed_in", "embed_out"]
73
+ if model_type == "falcon":
74
+ return ["word_embeddings", "lm_head"]
75
+ return ["embed_tokens", "lm_head"]
76
+
77
+
78
+ def find_all_linear_names(model):
79
+ cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear, QuantLinear)
80
+
81
+ names = []
82
+ for name, module in model.named_modules():
83
+ if (
84
+ isinstance(module, cls)
85
+ or "Linear" in module.__class__.__name__
86
+ and module.__class__.__name__ not in ("LlamaLinearScalingRotaryEmbedding",)
87
+ ):
88
+ names.append(name)
89
+
90
+
91
+ return names
92
+
93
+ def get_linear_module_names(model_id):
94
+ model = AutoModelForCausalLM.from_pretrained(model_id, state_dict={}, device_map="meta") #avoid loading weights as we won't need them
95
+ return find_all_linear_names(model)
96
+
97
+ linear_module_names = get_linear_module_names(models['base']['download_path'])
98
+
99
+
100
+ # ### Downloading the target model
101
+
102
+ # In[6]:
103
+
104
+
105
+ target_model_download_path, target_model_has_safetensors = init_transformers_model(target_model_id, cache_dir)
106
+
107
+ models['target'] = {
108
+ 'download_path' : target_model_download_path,
109
+ 'has_safetensors' : target_model_has_safetensors
110
+ }
111
+
112
+
113
+ # ### Loading tensors from .bin files
114
+ #
115
+ # Define functions to load PyTorch tensors from `.bin` files or `.safetensors` file.
116
+
117
+ # In[7]:
118
+
119
+
120
+ import torch
121
+ import glob
122
+
123
+ def load_pytorch_tensors(directory, device='cpu'):
124
+ """
125
+ Loads tensors from .bin files in the specified directory into a dictionary.
126
+
127
+ Args:
128
+ - directory (str): Path to the directory containing .bin files.
129
+ - device (str): The device to load the tensors on ('cpu', 'cuda', etc.). Default is 'cpu'.
130
+
131
+ Returns:
132
+ - dict: A dictionary containing all tensors from the .bin files.
133
+ """
134
+ tensors_dict = {}
135
+ # Use glob to find all .bin files in the directory
136
+ file_paths = glob.glob(f"{directory}/*.bin")
137
+
138
+ # Loop through each file and load its tensors into the dictionary
139
+ for file_path in sorted(file_paths):
140
+ loaded_tensors = torch.load(file_path, map_location=torch.device(device))
141
+ for k, v in loaded_tensors.items():
142
+ tensors_dict[k] = v
143
+
144
+ return tensors_dict
145
+
146
+
147
+ # In[8]:
148
+
149
+
150
+ import glob
151
+ from safetensors import safe_open
152
+
153
+ def load_safetensors(directory, framework="pt", device='cpu'):
154
+ """
155
+ Loads tensors from .safetensors files in the specified directory into a dictionary.
156
+
157
+ Args:
158
+ - directory (str): Path to the directory containing .safetensors files.
159
+ - framework (str): The framework to use ('pt' for PyTorch, 'tf' for TensorFlow, etc.). Default is 'pt'.
160
+ - device (str): The device to load the tensors on ('cpu', 'cuda', etc.). Default is 'cpu'.
161
+
162
+ Returns:
163
+ - dict: A dictionary containing all tensors from the .safetensors files.
164
+ """
165
+ tensors_dict = {}
166
+ # Use glob to find all .safetensors files in the directory
167
+ file_paths = glob.glob(f"{directory}/*.safetensors")
168
+
169
+ # Loop through each file and load its tensors into the dictionary
170
+ for file_path in sorted(file_paths):
171
+ with safe_open(file_path, framework=framework, device=device) as f:
172
+ for k in f.keys():
173
+ tensors_dict[k] = f.get_tensor(k)
174
+
175
+ return tensors_dict
176
+
177
+
178
+ # ### Loading model weights
179
+ #
180
+ # Load weights for both base and target models
181
+
182
+ # In[9]:
183
+
184
+
185
+ base_model_weights = load_safetensors(models['base']['download_path']) if models['base']['has_safetensors'] else load_pytorch_tensors(models['base']['download_path'])
186
+ print("Base model weights loaded.")
187
+ target_model_weights = load_safetensors(models['target']['download_path']) if models['target']['has_safetensors'] else load_pytorch_tensors(models['target']['download_path'])
188
+ print("Target model weights loaded.")
189
+
190
+ # ### Weight matrix decomposition
191
+ #
192
+ # The crux of what we're doing here. We define a function to decompose weight matrices into low-rank matrices using SVD
193
+
194
+ # In[10]:
195
+
196
+
197
+ import torch
198
+
199
+ def _low_rank_decomposition(weight, reduced_rank=16):
200
+ """
201
+ Decompose a 2D matrix into low-rank matrices A and B using SVD.a
202
+
203
+ :param weight: The matrix to decompose, of shape (H, W)
204
+ :param reduced_rank: The final rank of the decomposition
205
+ :return: A tuple of tensors (A, B)
206
+ """
207
+ if weight.dim() != 2:
208
+ raise ValueError(f"Only support 2D matrix, but your input has {weight.dim()} dimensions.")
209
+
210
+ weight = weight.to(torch.float32)
211
+ # SVD Decomposition
212
+ U, S, Vh = torch.linalg.svd(weight, full_matrices=False)
213
+
214
+ # Truncated matrices
215
+ A = Vh[:reduced_rank, :]
216
+ B = U[:, :reduced_rank] @ torch.diag(S[:reduced_rank])
217
+
218
+ return A, B
219
+
220
+ def decompose_delta_weight(new_weight, base_weight, alpha, reduced_rank, device=None):
221
+ if device is None:
222
+ device = 'cpu'
223
+
224
+ new_weight = new_weight.to(device)
225
+ base_weight = base_weight.to(device)
226
+
227
+ """
228
+ Decompose the delta weight into low-rank matrices A and B, considering the alpha scaling factor.
229
+
230
+ :param new_weight: The updated weight matrix after applying LoRA.
231
+ :param base_weight: The original weight matrix before LoRA.
232
+ :param alpha: The alpha scaling factor used in LoRA.
233
+ :param reduced_rank: The rank for the low-rank decomposition.
234
+ :return: A tuple of tensors (A, B)
235
+ """
236
+ delta_weight = new_weight - base_weight
237
+
238
+ # Check if alpha is applied uniformly
239
+ # Adjust the implementation if alpha is applied differently
240
+ adjusted_delta_weight = delta_weight / alpha
241
+
242
+ A, B = _low_rank_decomposition(adjusted_delta_weight, reduced_rank=reduced_rank)
243
+
244
+ return A, B
245
+
246
+
247
+ # ## Extract the LoRAs
248
+
249
+ # In[11]:
250
+
251
+
252
+ from tqdm.notebook import tqdm
253
+
254
+ loras = {
255
+
256
+ }
257
+
258
+ # lower rank captures less of the original model, a rank of 32 is probably reasonable for small delta (task specific finetunes and such)
259
+ alpha = 1
260
+ rank = 32
261
+ print("Decomposing LORA...(This may take a few hours for larger models)")
262
+ for module in tqdm(linear_module_names):
263
+ target_tensor = target_model_weights[module+".weight"]
264
+ base_tensor = base_model_weights[module+".weight"]
265
+
266
+ lora_A, lora_B = decompose_delta_weight(target_tensor, base_tensor, alpha, rank)
267
+ loras[f"base_model.model.{module}.lora_A.weight"] = lora_A.to('cpu')
268
+ loras[f"base_model.model.{module}.lora_B.weight"] = lora_B.to('cpu')
269
+
270
+ del target_model_weights
271
+ del base_model_weights
272
+ print("LORA decomposed.")
273
+ # ### Extracting correct module names for PEFT
274
+ #
275
+ # PEFT config uses partial module names, let's extract them correctly
276
+
277
+ # In[12]:
278
+
279
+
280
+ def get_module_peft_name(module_name):
281
+ return module_name.split('.')[-1]
282
+
283
+
284
+ # ### Configuring LoRA model with PEFT
285
+ #
286
+ # Set up a PEFT LoRA configuration for the model. Load the base model and apply this configuration, saving the configuration on disk. The LoRA weights will be saved later from our SVD decomposition.
287
+
288
+ # In[13]:
289
+
290
+
291
+ from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
292
+ from peft import get_peft_model, LoraConfig
293
+
294
+
295
+
296
+ lora_config = LoraConfig(
297
+ lora_alpha=32, # Setting the alpha to the to decomposition rank value (instead of alpha value used) seems to give better performance. Further testing would be needed to understand what is the optimal alpha value to use
298
+ lora_dropout=0,
299
+ r=32,
300
+ bias="none",
301
+ task_type="CAUSAL_LM",
302
+ target_modules= list(set([get_module_peft_name(e) for e in linear_module_names])),
303
+ )
304
+
305
+ print("Saving LORA to disk...")
306
+ model = AutoModelForCausalLM.from_pretrained(models['base']['download_path'], load_in_4bit=False)
307
+
308
+ peft_model = get_peft_model(model, lora_config)
309
+
310
+ # Save to disk
311
+ peft_model.save_pretrained(LORA_OUT_DIR)
312
+
313
+ del peft_model
314
+ del model
315
+ print("LORA saved to disk.")
316
+
317
+ # ### Saving LoRA adapters as SafeTensors
318
+ #
319
+ # Save the decomposed LoRA weights along our PEFT adapter config
320
+
321
+ # In[14]:
322
+
323
+
324
+ import torch
325
+ from safetensors.torch import save_file
326
+
327
+ print("Saving LoRA adapters as SafeTensors...")
328
+ for key in loras.keys():
329
+ loras[key] = loras[key].to('cpu').contiguous()
330
+
331
+ save_file(loras, os.path.join(LORA_OUT_DIR, 'adapter_model.safetensors'))
332
+
333
+ print("Saved LoRA adapters as SafeTensors.")
334
+ # First, let's replace the `base_model_name_or_path` value of the adapter config with the base model id instead of the local path
335
+
336
+ # In[22]:
337
+
338
+
339
+ import os
340
+ import json
341
+
342
+ print("Adding metadata...")
343
+ adapter_config_path = os.path.join(LORA_OUT_DIR, 'adapter_config.json')
344
+
345
+ # Load the configuration from the file
346
+ with open(adapter_config_path, 'r') as file:
347
+ config = json.load(file)
348
+
349
+ # Update the base_model_name_or_path in the configuration
350
+ config['base_model_name_or_path'] = base_model_id
351
+
352
+ # Save the updated configuration back to the file
353
+ with open(adapter_config_path, 'w') as file:
354
+ json.dump(config, file, indent=2)
355
+
356
+ print("Configuration updated successfully.")
357
+
358
+
359
+ # Now let's create a readme
360
+
361
+ # In[23]:
362
+
363
+
364
+ import yaml
365
+
366
+ # Define your metadata as a Python dictionary
367
+ metadata = {
368
+ 'library_name': 'peft',
369
+ 'base_model': base_model_id
370
+ }
371
+
372
+ # Convert the dictionary to YAML format
373
+ yaml_frontmatter = yaml.dump(metadata, sort_keys=False)
374
+
375
+ # Define your Markdown content
376
+ markdown_content = f"""
377
+ # Low-rank decomposition of [{target_model_id}](https://huggingface.co/{target_model_id}) using [{base_model_id}](https://huggingface.co/{base_model_id}) as base
378
+
379
+ Created using [LoRD](https://github.com/thomasgauthier/LoRD)
380
+ """
381
+
382
+ # Combine the YAML frontmatter and Markdown content
383
+ full_content = f"---\n{yaml_frontmatter}---\n{markdown_content}"
384
+
385
+ adapter_readme_path = os.path.join(LORA_OUT_DIR, 'README.md')
386
+
387
+ # Write to a Markdown file
388
+ with open(adapter_readme_path, 'w') as md_file:
389
+ md_file.write(full_content)
390
+
391
+ print("Markdown file successfully created.")