CorDA_KPA_nqopen_finetuned_math / modeling_oursvd_llama.py

Upload CovSVDLlamaForCausalLM

77f530e verified 11 months ago

1.95 kB

	from transformers import LlamaForCausalLM
	from .configuration_oursvd_llama import CovSVDLlamaConfig
	import torch.nn as nn
	import torch.nn.functional as F
	import torch

	class CovSVDLinear(nn.Module):
	def __init__(self, in_features, out_features, rank, bias=True):
	super().__init__()
	self.BLinear = nn.Linear(in_features, rank, bias=False)
	self.ALinear = nn.Linear(rank, out_features, bias=bias)
	self.weight_residual = nn.Parameter(torch.zeros(out_features, in_features))
	self.weight_residual.requires_grad = False


	def forward(self, input):
	y = self.BLinear(input)
	y = self.ALinear(y) + F.linear(input, self.weight_residual)
	return y


	class CovSVDLlamaForCausalLM(LlamaForCausalLM):
	config_class = CovSVDLlamaConfig
	def __init__(self, config:CovSVDLlamaConfig):
	super().__init__(config)

	self.lora_r = config.lora_r
	full_name_dict = {module: name for name, module in self.named_modules()}
	linear_info = {}
	modules = [self]
	while len(modules) > 0:
	submodule = modules.pop()
	for name, raw_linear in submodule.named_children():
	if isinstance(raw_linear, nn.Linear):
	full_name = full_name_dict[raw_linear]
	linear_info[raw_linear] = {
	"father": submodule,
	"name": name,
	"full_name": full_name,
	}
	else:
	modules.append(raw_linear)


	for name,module in self.named_modules():
	if "lm_head" not in name and isinstance(module, nn.Linear):
	info=linear_info[module]
	new_layer=CovSVDLinear(module.in_features, module.out_features, self.lora_r, bias=module.bias is not None)
	setattr(info["father"], info["name"], new_layer)