File size: 3,266 Bytes
5231633 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import torch
from torch import nn
class LoRA(nn.Module):
def __init__(self, layer, name='weight', rank=16, alpha=1):
super().__init__()
weight = getattr(layer, name)
self.lora_down = nn.Parameter(torch.zeros((rank, weight.size(1))))
self.lora_up = nn.Parameter(torch.zeros((weight.size(0), rank)))
nn.init.normal_(self.lora_up, mean=0, std=1)
self.scale = alpha / rank
self.enabled = True
def forward(self, original_weights):
if self.enabled:
lora_shape = list(original_weights.shape[:2]) + [1] * (len(original_weights.shape) - 2)
lora_weights = torch.matmul(self.lora_up.clone(), self.lora_down.clone()).view(*lora_shape) * self.scale
return original_weights + lora_weights
else:
return original_weights
def apply_lora(model, filters=None, rank=16):
def check_parameter(module, name):
return hasattr(module, name) and not torch.nn.utils.parametrize.is_parametrized(module, name) and isinstance(
getattr(module, name), nn.Parameter)
for name, module in model.named_modules():
if filters is None or any([f in name for f in filters]):
if check_parameter(module, "weight"):
device, dtype = module.weight.device, module.weight.dtype
torch.nn.utils.parametrize.register_parametrization(module, 'weight', LoRA(module, "weight", rank=rank).to(dtype).to(device))
elif check_parameter(module, "in_proj_weight"):
device, dtype = module.in_proj_weight.device, module.in_proj_weight.dtype
torch.nn.utils.parametrize.register_parametrization(module, 'in_proj_weight', LoRA(module, "in_proj_weight", rank=rank).to(dtype).to(device))
class ReToken(nn.Module):
def __init__(self, indices=None):
super().__init__()
assert indices is not None
self.embeddings = nn.Parameter(torch.zeros(len(indices), 1280))
self.register_buffer('indices', torch.tensor(indices))
self.enabled = True
def forward(self, embeddings):
if self.enabled:
embeddings = embeddings.clone()
for i, idx in enumerate(self.indices):
embeddings[idx] += self.embeddings[i]
return embeddings
def apply_retoken(module, indices=None):
def check_parameter(module, name):
return hasattr(module, name) and not torch.nn.utils.parametrize.is_parametrized(module, name) and isinstance(
getattr(module, name), nn.Parameter)
if check_parameter(module, "weight"):
device, dtype = module.weight.device, module.weight.dtype
torch.nn.utils.parametrize.register_parametrization(module, 'weight', ReToken(indices=indices).to(dtype).to(device))
def remove_lora(model, leave_parametrized=True):
for module in model.modules():
if torch.nn.utils.parametrize.is_parametrized(module, "weight"):
nn.utils.parametrize.remove_parametrizations(module, "weight", leave_parametrized=leave_parametrized)
elif torch.nn.utils.parametrize.is_parametrized(module, "in_proj_weight"):
nn.utils.parametrize.remove_parametrizations(module, "in_proj_weight", leave_parametrized=leave_parametrized)
|