masanorihirano
commited on
Commit
•
c693f6c
1
Parent(s):
1b0d72c
test
Browse files
app.py
CHANGED
@@ -24,12 +24,30 @@ from peft import LoraConfig
|
|
24 |
from peft import PeftModel
|
25 |
from peft import get_peft_model
|
26 |
from peft import set_peft_model_state_dict
|
27 |
-
import
|
|
|
28 |
from transformers import PreTrainedModel
|
29 |
from transformers import PreTrainedTokenizerBase
|
30 |
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def load_lora_model(
|
35 |
model_path: str,
|
@@ -48,7 +66,7 @@ def load_lora_model(
|
|
48 |
device=device,
|
49 |
num_gpus=num_gpus,
|
50 |
max_gpu_memory=max_gpu_memory,
|
51 |
-
load_8bit=
|
52 |
cpu_offloading=cpu_offloading,
|
53 |
debug=debug,
|
54 |
)
|
|
|
24 |
from peft import PeftModel
|
25 |
from peft import get_peft_model
|
26 |
from peft import set_peft_model_state_dict
|
27 |
+
from transformers import LlamaForCausalLM
|
28 |
+
from transformers import LlamaTokenizer
|
29 |
from transformers import PreTrainedModel
|
30 |
from transformers import PreTrainedTokenizerBase
|
31 |
|
32 |
+
|
33 |
+
class LLaMAdapter(BaseAdapter):
|
34 |
+
"Model adapater for vicuna-v1.1"
|
35 |
+
|
36 |
+
def match(self, model_path: str):
|
37 |
+
return "llama" in model_path
|
38 |
+
|
39 |
+
def load_model(self, model_path: str, from_pretrained_kwargs: dict):
|
40 |
+
tokenizer = LlamaTokenizer.from_pretrained(model_path, use_fast=False)
|
41 |
+
model = LlamaForCausalLM.from_pretrained(
|
42 |
+
model_path,
|
43 |
+
low_cpu_mem_usage=True,
|
44 |
+
**from_pretrained_kwargs,
|
45 |
+
)
|
46 |
+
return model, tokenizer
|
47 |
+
|
48 |
+
|
49 |
+
model_adapters.insert(-1, LLaMAdapter())
|
50 |
+
|
51 |
|
52 |
def load_lora_model(
|
53 |
model_path: str,
|
|
|
66 |
device=device,
|
67 |
num_gpus=num_gpus,
|
68 |
max_gpu_memory=max_gpu_memory,
|
69 |
+
load_8bit=False,
|
70 |
cpu_offloading=cpu_offloading,
|
71 |
debug=debug,
|
72 |
)
|