File size: 1,578 Bytes
4900ca9 cd0f47e 4900ca9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import sys
sys.path.insert(1, '/workspace/asr/peft/src')
# TODO set this path to the lazy-lora source code path, or you can install it from source code:
# TODO, please install lazylora for usage:
# git clone git@github.com:Xianchao-Wu/peft.git
# cd peft
# python setup.py install
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
import os
import torch
#import ipdb; ipdb.set_trace()
cache_dir="/workspace/asr/peft/qlora"
# TODO set this cache_dir to the path where you stored (or, want to store) llama2-7bhf model
lazylora_dir=os.getcwd() # the path that contains 'adapter_config.json' and 'adapter_model.bin'
config = PeftConfig.from_pretrained(lazylora_dir)
tokenizer = AutoTokenizer.from_pretrained(
config.base_model_name_or_path,
cache_dir=cache_dir,
use_auth_token=True
)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
quantization_config=bnb_config,
device_map="auto",
cache_dir=cache_dir,
use_auth_token=True
)
#model.print_trainable_parameters()
print(sum(p.numel() for p in model.parameters()))
# 6,671,979,520 -> half-size of 7B due to 4-bit loading
model = PeftModel.from_pretrained(model, lazylora_dir)
print('after adding lazy lora parameters:')
model.print_trainable_parameters()
# trainable params: 0 || all params: 6,922,290,688 || trainable%: 0.0
|