masanorihirano commited on
Commit
c693f6c
1 Parent(s): 1b0d72c
Files changed (1) hide show
  1. app.py +22 -4
app.py CHANGED
@@ -24,12 +24,30 @@ from peft import LoraConfig
24
  from peft import PeftModel
25
  from peft import get_peft_model
26
  from peft import set_peft_model_state_dict
27
- import transformers
 
28
  from transformers import PreTrainedModel
29
  from transformers import PreTrainedTokenizerBase
30
 
31
- transformers.AutoTokenizer = transformers.LlamaTokenizer
32
- transformers.AutoModelForCausalLM = transformers.LlamaForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def load_lora_model(
35
  model_path: str,
@@ -48,7 +66,7 @@ def load_lora_model(
48
  device=device,
49
  num_gpus=num_gpus,
50
  max_gpu_memory=max_gpu_memory,
51
- load_8bit=load_8bit,
52
  cpu_offloading=cpu_offloading,
53
  debug=debug,
54
  )
 
24
  from peft import PeftModel
25
  from peft import get_peft_model
26
  from peft import set_peft_model_state_dict
27
+ from transformers import LlamaForCausalLM
28
+ from transformers import LlamaTokenizer
29
  from transformers import PreTrainedModel
30
  from transformers import PreTrainedTokenizerBase
31
 
32
+
33
+ class LLaMAdapter(BaseAdapter):
34
+ "Model adapater for vicuna-v1.1"
35
+
36
+ def match(self, model_path: str):
37
+ return "llama" in model_path
38
+
39
+ def load_model(self, model_path: str, from_pretrained_kwargs: dict):
40
+ tokenizer = LlamaTokenizer.from_pretrained(model_path, use_fast=False)
41
+ model = LlamaForCausalLM.from_pretrained(
42
+ model_path,
43
+ low_cpu_mem_usage=True,
44
+ **from_pretrained_kwargs,
45
+ )
46
+ return model, tokenizer
47
+
48
+
49
+ model_adapters.insert(-1, LLaMAdapter())
50
+
51
 
52
  def load_lora_model(
53
  model_path: str,
 
66
  device=device,
67
  num_gpus=num_gpus,
68
  max_gpu_memory=max_gpu_memory,
69
+ load_8bit=False,
70
  cpu_offloading=cpu_offloading,
71
  debug=debug,
72
  )