Spaces:
Runtime error
Runtime error
Arrcttacsrks
commited on
Commit
•
16c390a
1
Parent(s):
1f888e3
Upload llama.cpp/convert_lora_to_gguf.py with huggingface_hub
Browse files
llama.cpp/convert_lora_to_gguf.py
ADDED
@@ -0,0 +1,433 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
from __future__ import annotations
|
5 |
+
|
6 |
+
from dataclasses import dataclass
|
7 |
+
import logging
|
8 |
+
import argparse
|
9 |
+
import os
|
10 |
+
import sys
|
11 |
+
import json
|
12 |
+
from math import prod
|
13 |
+
from pathlib import Path
|
14 |
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast
|
15 |
+
from transformers import AutoConfig
|
16 |
+
|
17 |
+
import torch
|
18 |
+
|
19 |
+
if TYPE_CHECKING:
|
20 |
+
from torch import Tensor
|
21 |
+
|
22 |
+
if 'NO_LOCAL_GGUF' not in os.environ:
|
23 |
+
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
24 |
+
import gguf
|
25 |
+
|
26 |
+
# reuse model definitions from convert_hf_to_gguf.py
|
27 |
+
from convert_hf_to_gguf import LazyTorchTensor, Model
|
28 |
+
|
29 |
+
logger = logging.getLogger("lora-to-gguf")
|
30 |
+
|
31 |
+
|
32 |
+
@dataclass
|
33 |
+
class PartialLoraTensor:
|
34 |
+
A: Tensor | None = None
|
35 |
+
B: Tensor | None = None
|
36 |
+
|
37 |
+
|
38 |
+
# magic to support tensor shape modifications and splitting
|
39 |
+
class LoraTorchTensor:
|
40 |
+
_lora_A: Tensor # (n_rank, row_size)
|
41 |
+
_lora_B: Tensor # (col_size, n_rank)
|
42 |
+
_rank: int
|
43 |
+
|
44 |
+
def __init__(self, A: Tensor, B: Tensor):
|
45 |
+
assert len(A.shape) == len(B.shape)
|
46 |
+
assert A.shape[-2] == B.shape[-1]
|
47 |
+
if A.dtype != B.dtype:
|
48 |
+
A = A.to(torch.float32)
|
49 |
+
B = B.to(torch.float32)
|
50 |
+
self._lora_A = A
|
51 |
+
self._lora_B = B
|
52 |
+
self._rank = B.shape[-1]
|
53 |
+
|
54 |
+
def get_lora_A_B(self) -> tuple[Tensor, Tensor]:
|
55 |
+
return (self._lora_A, self._lora_B)
|
56 |
+
|
57 |
+
def __getitem__(
|
58 |
+
self,
|
59 |
+
indices: (
|
60 |
+
SupportsIndex
|
61 |
+
| slice
|
62 |
+
| tuple[SupportsIndex | slice | Tensor, ...] # TODO: add ellipsis in the type signature
|
63 |
+
),
|
64 |
+
) -> LoraTorchTensor:
|
65 |
+
shape = self.shape
|
66 |
+
if isinstance(indices, SupportsIndex):
|
67 |
+
if len(shape) > 2:
|
68 |
+
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
|
69 |
+
else:
|
70 |
+
raise NotImplementedError # can't return a vector
|
71 |
+
elif isinstance(indices, slice):
|
72 |
+
if len(shape) > 2:
|
73 |
+
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
|
74 |
+
else:
|
75 |
+
return LoraTorchTensor(self._lora_A, self._lora_B[indices])
|
76 |
+
elif isinstance(indices, tuple):
|
77 |
+
assert len(indices) > 0
|
78 |
+
if indices[-1] is Ellipsis:
|
79 |
+
return self[indices[:-1]]
|
80 |
+
# expand ellipsis
|
81 |
+
indices = tuple(
|
82 |
+
u
|
83 |
+
for v in (
|
84 |
+
(
|
85 |
+
(slice(None, None) for _ in range(len(indices) - 1))
|
86 |
+
if i is Ellipsis
|
87 |
+
else (i,)
|
88 |
+
)
|
89 |
+
for i in indices
|
90 |
+
)
|
91 |
+
for u in v
|
92 |
+
)
|
93 |
+
|
94 |
+
if len(indices) < len(shape):
|
95 |
+
indices = (*indices, *(slice(None, None) for _ in range(len(indices), len(shape))))
|
96 |
+
|
97 |
+
# TODO: make sure this is correct
|
98 |
+
indices_A = (
|
99 |
+
*(
|
100 |
+
(
|
101 |
+
j.__index__() % self._lora_A.shape[i]
|
102 |
+
if isinstance(j, SupportsIndex)
|
103 |
+
else slice(None, None)
|
104 |
+
)
|
105 |
+
for i, j in enumerate(indices[:-2])
|
106 |
+
),
|
107 |
+
slice(None, None),
|
108 |
+
indices[-1],
|
109 |
+
)
|
110 |
+
indices_B = indices[:-1]
|
111 |
+
return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])
|
112 |
+
else:
|
113 |
+
raise NotImplementedError # unknown indice type
|
114 |
+
|
115 |
+
@property
|
116 |
+
def dtype(self) -> torch.dtype:
|
117 |
+
assert self._lora_A.dtype == self._lora_B.dtype
|
118 |
+
return self._lora_A.dtype
|
119 |
+
|
120 |
+
@property
|
121 |
+
def shape(self) -> tuple[int, ...]:
|
122 |
+
assert len(self._lora_A.shape) == len(self._lora_B.shape)
|
123 |
+
return (*self._lora_B.shape[:-1], self._lora_A.shape[-1])
|
124 |
+
|
125 |
+
def size(self, dim=None):
|
126 |
+
assert dim is None
|
127 |
+
return self.shape
|
128 |
+
|
129 |
+
def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
|
130 |
+
if isinstance(shape[0], tuple):
|
131 |
+
new_shape: tuple[int, ...] = shape[0]
|
132 |
+
else:
|
133 |
+
new_shape = cast(tuple[int, ...], shape)
|
134 |
+
orig_shape = self.shape
|
135 |
+
if len(new_shape) < 2:
|
136 |
+
raise NotImplementedError # can't become a vector
|
137 |
+
|
138 |
+
# expand -1 in the shape
|
139 |
+
if any(dim == -1 for dim in new_shape):
|
140 |
+
n_elems = prod(orig_shape)
|
141 |
+
n_new_elems = prod(dim if dim != -1 else 1 for dim in new_shape)
|
142 |
+
assert n_elems % n_new_elems == 0
|
143 |
+
new_shape = (*(dim if dim != -1 else n_elems // n_new_elems for dim in new_shape),)
|
144 |
+
|
145 |
+
if new_shape[-1] != orig_shape[-1]:
|
146 |
+
raise NotImplementedError # can't reshape the row size trivially
|
147 |
+
|
148 |
+
shape_A = (*(1 for _ in new_shape[:-2]), self._rank, orig_shape[-1])
|
149 |
+
shape_B = (*new_shape[:-1], self._rank)
|
150 |
+
return LoraTorchTensor(
|
151 |
+
self._lora_A.reshape(shape_A),
|
152 |
+
self._lora_B.reshape(shape_B),
|
153 |
+
)
|
154 |
+
|
155 |
+
def reshape_as(self, other: Tensor) -> LoraTorchTensor:
|
156 |
+
return self.reshape(*other.shape)
|
157 |
+
|
158 |
+
def view(self, *size: int) -> LoraTorchTensor:
|
159 |
+
return self.reshape(*size)
|
160 |
+
|
161 |
+
def permute(self, *dims: int) -> LoraTorchTensor:
|
162 |
+
shape = self.shape
|
163 |
+
dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)
|
164 |
+
if dims[-1] == -1:
|
165 |
+
# TODO: support higher dimensional A shapes bigger than 1
|
166 |
+
assert all(dim == 1 for dim in self._lora_A.shape[:-2])
|
167 |
+
return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))
|
168 |
+
if len(shape) == 2 and dims[-1] == -2 and dims[-2] == -1:
|
169 |
+
return LoraTorchTensor(self._lora_B.permute(*dims), self._lora_A.permute(*dims))
|
170 |
+
else:
|
171 |
+
# TODO: compose the above two
|
172 |
+
raise NotImplementedError
|
173 |
+
|
174 |
+
def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:
|
175 |
+
shape = self.shape
|
176 |
+
dims = [i for i in range(len(shape))]
|
177 |
+
dims[dim0], dims[dim1] = dims[dim1], dims[dim0]
|
178 |
+
return self.permute(*dims)
|
179 |
+
|
180 |
+
def swapaxes(self, axis0: int, axis1: int) -> LoraTorchTensor:
|
181 |
+
return self.transpose(axis0, axis1)
|
182 |
+
|
183 |
+
def to(self, *args, **kwargs):
|
184 |
+
return LoraTorchTensor(self._lora_A.to(*args, **kwargs), self._lora_B.to(*args, **kwargs))
|
185 |
+
|
186 |
+
@classmethod
|
187 |
+
def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):
|
188 |
+
del types # unused
|
189 |
+
|
190 |
+
if kwargs is None:
|
191 |
+
kwargs = {}
|
192 |
+
|
193 |
+
if func is torch.permute:
|
194 |
+
return type(args[0]).permute(*args, **kwargs)
|
195 |
+
elif func is torch.reshape:
|
196 |
+
return type(args[0]).reshape(*args, **kwargs)
|
197 |
+
elif func is torch.stack:
|
198 |
+
assert isinstance(args[0], Sequence)
|
199 |
+
dim = kwargs.get("dim", 0)
|
200 |
+
assert dim == 0
|
201 |
+
return LoraTorchTensor(
|
202 |
+
torch.stack([a._lora_A for a in args[0]], dim),
|
203 |
+
torch.stack([b._lora_B for b in args[0]], dim),
|
204 |
+
)
|
205 |
+
elif func is torch.cat:
|
206 |
+
assert isinstance(args[0], Sequence)
|
207 |
+
dim = kwargs.get("dim", 0)
|
208 |
+
assert dim == 0
|
209 |
+
if len(args[0][0].shape) > 2:
|
210 |
+
return LoraTorchTensor(
|
211 |
+
torch.cat([a._lora_A for a in args[0]], dim),
|
212 |
+
torch.cat([b._lora_B for b in args[0]], dim),
|
213 |
+
)
|
214 |
+
elif all(torch.equal(args[0][0]._lora_A, t._lora_A) for t in args[0][1:]):
|
215 |
+
return LoraTorchTensor(
|
216 |
+
args[0][0]._lora_A,
|
217 |
+
torch.cat([b._lora_B for b in args[0]], dim),
|
218 |
+
)
|
219 |
+
else:
|
220 |
+
raise NotImplementedError
|
221 |
+
else:
|
222 |
+
raise NotImplementedError
|
223 |
+
|
224 |
+
|
225 |
+
def get_base_tensor_name(lora_tensor_name: str) -> str:
|
226 |
+
base_name = lora_tensor_name.replace("base_model.model.", "")
|
227 |
+
base_name = base_name.replace(".lora_A.weight", ".weight")
|
228 |
+
base_name = base_name.replace(".lora_B.weight", ".weight")
|
229 |
+
return base_name
|
230 |
+
|
231 |
+
|
232 |
+
def parse_args() -> argparse.Namespace:
|
233 |
+
parser = argparse.ArgumentParser(
|
234 |
+
description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file")
|
235 |
+
parser.add_argument(
|
236 |
+
"--outfile", type=Path,
|
237 |
+
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
238 |
+
)
|
239 |
+
parser.add_argument(
|
240 |
+
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16",
|
241 |
+
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
242 |
+
)
|
243 |
+
parser.add_argument(
|
244 |
+
"--bigendian", action="store_true",
|
245 |
+
help="model is executed on big endian machine",
|
246 |
+
)
|
247 |
+
parser.add_argument(
|
248 |
+
"--no-lazy", action="store_true",
|
249 |
+
help="use more RAM by computing all outputs before writing (use in case lazy evaluation is broken)",
|
250 |
+
)
|
251 |
+
parser.add_argument(
|
252 |
+
"--verbose", action="store_true",
|
253 |
+
help="increase output verbosity",
|
254 |
+
)
|
255 |
+
parser.add_argument(
|
256 |
+
"--dry-run", action="store_true",
|
257 |
+
help="only print out what will be done, without writing any new files",
|
258 |
+
)
|
259 |
+
parser.add_argument(
|
260 |
+
"--base", type=Path,
|
261 |
+
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
|
262 |
+
)
|
263 |
+
parser.add_argument(
|
264 |
+
"lora_path", type=Path,
|
265 |
+
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
|
266 |
+
)
|
267 |
+
|
268 |
+
return parser.parse_args()
|
269 |
+
|
270 |
+
|
271 |
+
def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
|
272 |
+
# normally, adapter does not come with base model config, we need to load it from AutoConfig
|
273 |
+
config = AutoConfig.from_pretrained(hf_model_id)
|
274 |
+
return config.to_dict()
|
275 |
+
|
276 |
+
|
277 |
+
if __name__ == '__main__':
|
278 |
+
args = parse_args()
|
279 |
+
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
280 |
+
|
281 |
+
ftype_map: dict[str, gguf.LlamaFileType] = {
|
282 |
+
"f32": gguf.LlamaFileType.ALL_F32,
|
283 |
+
"f16": gguf.LlamaFileType.MOSTLY_F16,
|
284 |
+
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
|
285 |
+
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
|
286 |
+
"auto": gguf.LlamaFileType.GUESSED,
|
287 |
+
}
|
288 |
+
|
289 |
+
ftype = ftype_map[args.outtype]
|
290 |
+
|
291 |
+
dir_base_model: Path | None = args.base
|
292 |
+
dir_lora: Path = args.lora_path
|
293 |
+
lora_config = dir_lora / "adapter_config.json"
|
294 |
+
input_model = dir_lora / "adapter_model.safetensors"
|
295 |
+
|
296 |
+
if args.outfile is not None:
|
297 |
+
fname_out = args.outfile
|
298 |
+
else:
|
299 |
+
# output in the same directory as the model by default
|
300 |
+
fname_out = dir_lora
|
301 |
+
|
302 |
+
if os.path.exists(input_model):
|
303 |
+
# lazy import load_file only if lora is in safetensors format.
|
304 |
+
from safetensors.torch import load_file
|
305 |
+
|
306 |
+
lora_model = load_file(input_model, device="cpu")
|
307 |
+
else:
|
308 |
+
input_model = os.path.join(dir_lora, "adapter_model.bin")
|
309 |
+
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
|
310 |
+
|
311 |
+
# load LoRA config
|
312 |
+
with open(lora_config, "r") as f:
|
313 |
+
lparams: dict[str, Any] = json.load(f)
|
314 |
+
|
315 |
+
# load base model
|
316 |
+
if dir_base_model is None:
|
317 |
+
if "base_model_name_or_path" in lparams:
|
318 |
+
model_id = lparams["base_model_name_or_path"]
|
319 |
+
logger.info(f"Loading base model from Hugging Face: {model_id}")
|
320 |
+
try:
|
321 |
+
hparams = load_hparams_from_hf(model_id)
|
322 |
+
except OSError as e:
|
323 |
+
logger.error(f"Failed to load base model config: {e}")
|
324 |
+
logger.error("Please try downloading the base model and add its path to --base")
|
325 |
+
sys.exit(1)
|
326 |
+
else:
|
327 |
+
logger.error("'base_model_name_or_path' is not found in adapter_config.json")
|
328 |
+
logger.error("Base model config is required. Please download the base model and add its path to --base")
|
329 |
+
sys.exit(1)
|
330 |
+
else:
|
331 |
+
logger.info(f"Loading base model: {dir_base_model.name}")
|
332 |
+
hparams = Model.load_hparams(dir_base_model)
|
333 |
+
|
334 |
+
with torch.inference_mode():
|
335 |
+
try:
|
336 |
+
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
337 |
+
except NotImplementedError:
|
338 |
+
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
339 |
+
sys.exit(1)
|
340 |
+
|
341 |
+
class LoraModel(model_class):
|
342 |
+
model_arch = model_class.model_arch
|
343 |
+
|
344 |
+
lora_alpha: float
|
345 |
+
|
346 |
+
def __init__(self, *args, dir_lora_model: Path, lora_alpha: float, **kwargs):
|
347 |
+
|
348 |
+
super().__init__(*args, **kwargs)
|
349 |
+
|
350 |
+
self.dir_model_card = dir_lora_model
|
351 |
+
self.lora_alpha = float(lora_alpha)
|
352 |
+
|
353 |
+
def set_vocab(self):
|
354 |
+
pass
|
355 |
+
|
356 |
+
def set_type(self):
|
357 |
+
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
358 |
+
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
359 |
+
|
360 |
+
def set_gguf_parameters(self):
|
361 |
+
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
|
362 |
+
|
363 |
+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
364 |
+
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
|
365 |
+
return ()
|
366 |
+
|
367 |
+
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
368 |
+
tensor_map: dict[str, PartialLoraTensor] = {}
|
369 |
+
|
370 |
+
for name, tensor in lora_model.items():
|
371 |
+
if self.lazy:
|
372 |
+
tensor = LazyTorchTensor.from_eager(tensor)
|
373 |
+
base_name = get_base_tensor_name(name)
|
374 |
+
is_lora_a = ".lora_A.weight" in name
|
375 |
+
is_lora_b = ".lora_B.weight" in name
|
376 |
+
if not is_lora_a and not is_lora_b:
|
377 |
+
if ".base_layer.weight" in name:
|
378 |
+
continue
|
379 |
+
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
|
380 |
+
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
|
381 |
+
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
|
382 |
+
logger.error("Please refer to https://github.com/ggerganov/llama.cpp/pull/9948")
|
383 |
+
sys.exit(1)
|
384 |
+
|
385 |
+
if base_name in tensor_map:
|
386 |
+
if is_lora_a:
|
387 |
+
tensor_map[base_name].A = tensor
|
388 |
+
else:
|
389 |
+
tensor_map[base_name].B = tensor
|
390 |
+
else:
|
391 |
+
if is_lora_a:
|
392 |
+
tensor_map[base_name] = PartialLoraTensor(A=tensor)
|
393 |
+
else:
|
394 |
+
tensor_map[base_name] = PartialLoraTensor(B=tensor)
|
395 |
+
|
396 |
+
for name, tensor in tensor_map.items():
|
397 |
+
assert tensor.A is not None
|
398 |
+
assert tensor.B is not None
|
399 |
+
yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)))
|
400 |
+
|
401 |
+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
402 |
+
dest = list(super().modify_tensors(data_torch, name, bid))
|
403 |
+
# some archs may have the same tensor for lm_head and output (tie word embeddings)
|
404 |
+
# in this case, adapters targeting lm_head will fail when using llama-export-lora
|
405 |
+
# therefore, we ignore them for now
|
406 |
+
# see: https://github.com/ggerganov/llama.cpp/issues/9065
|
407 |
+
if name == "lm_head.weight" and len(dest) == 0:
|
408 |
+
raise ValueError("lm_head is present in adapter, but is ignored in base model")
|
409 |
+
for dest_name, dest_data in dest:
|
410 |
+
assert isinstance(dest_data, LoraTorchTensor)
|
411 |
+
lora_a, lora_b = dest_data.get_lora_A_B()
|
412 |
+
|
413 |
+
yield (dest_name + ".lora_a", lora_a)
|
414 |
+
yield (dest_name + ".lora_b", lora_b)
|
415 |
+
|
416 |
+
alpha: float = lparams["lora_alpha"]
|
417 |
+
|
418 |
+
model_instance = LoraModel(
|
419 |
+
dir_base_model,
|
420 |
+
ftype,
|
421 |
+
fname_out,
|
422 |
+
is_big_endian=args.bigendian,
|
423 |
+
use_temp_file=False,
|
424 |
+
eager=args.no_lazy,
|
425 |
+
dry_run=args.dry_run,
|
426 |
+
dir_lora_model=dir_lora,
|
427 |
+
lora_alpha=alpha,
|
428 |
+
hparams=hparams,
|
429 |
+
)
|
430 |
+
|
431 |
+
logger.info("Exporting model...")
|
432 |
+
model_instance.write()
|
433 |
+
logger.info(f"Model successfully exported to {model_instance.fname_out}")
|