masanorihirano commited on
Commit
b0edcab
1 Parent(s): 9b5911e
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -26,9 +26,6 @@ from transformers import LlamaTokenizer
26
  from transformers import PreTrainedModel
27
  from transformers import PreTrainedTokenizerBase
28
 
29
- transformers.AutoTokenizer.from_pretrained = LlamaTokenizer.from_pretrained
30
- transformers.AutoModelForCausalLM.from_pretrained = LlamaForCausalLM.from_pretrained
31
-
32
 
33
  def load_lora_model(
34
  model_path: str,
@@ -41,13 +38,13 @@ def load_lora_model(
41
  ) -> Tuple[Union[PreTrainedModel, PeftModel], PreTrainedTokenizerBase]:
42
  model: Union[PreTrainedModel, PeftModel]
43
  tokenizer: PreTrainedTokenizerBase
44
- model, tokenizer = load_model(
45
- model_path=model_path,
46
- device=device,
47
- num_gpus=num_gpus,
48
- max_gpu_memory=max_gpu_memory,
49
- load_8bit=load_8bit,
50
- debug=debug,
51
  )
52
  if lora_weight is not None:
53
  # model = PeftModelForCausalLM.from_pretrained(model, model_path, **kwargs)
 
26
  from transformers import PreTrainedModel
27
  from transformers import PreTrainedTokenizerBase
28
 
 
 
 
29
 
30
  def load_lora_model(
31
  model_path: str,
 
38
  ) -> Tuple[Union[PreTrainedModel, PeftModel], PreTrainedTokenizerBase]:
39
  model: Union[PreTrainedModel, PeftModel]
40
  tokenizer: PreTrainedTokenizerBase
41
+ tokenizer = LlamaTokenizer.from_pretrained(model_path)
42
+ model = LlamaForCausalLM.from_pretrained(
43
+ model_path,
44
+ load_in_8bit=load_8bit,
45
+ device_map=device,
46
+ max_memory=max_gpu_memory,
47
+ torch_dtype=torch.float16,
48
  )
49
  if lora_weight is not None:
50
  # model = PeftModelForCausalLM.from_pretrained(model, model_path, **kwargs)