minhdang commited on
Commit
1bda56f
1 Parent(s): 659b257

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +3 -6
inference.py CHANGED
@@ -35,17 +35,14 @@ from deepseek_vl.utils.conversation import Conversation
35
 
36
  from transformers import BitsAndBytesConfig
37
 
38
- nf8_config = BitsAndBytesConfig(
39
- load_in_8bit=True,
40
- bnb_8bit_quant_type="nf8",
41
- )
42
  def load_model(model_path):
43
  vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
44
  tokenizer = vl_chat_processor.tokenizer
45
  vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
46
- model_path, trust_remote_code=True, quantization_config=nf8_config,low_cpu_mem_usage=True
47
  )
48
- vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
49
  return tokenizer, vl_gpt, vl_chat_processor
50
 
51
 
 
35
 
36
  from transformers import BitsAndBytesConfig
37
 
38
+
 
 
 
39
  def load_model(model_path):
40
  vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
41
  tokenizer = vl_chat_processor.tokenizer
42
  vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
43
+ model_path, trust_remote_code=True, load_in_8bit=True,low_cpu_mem_usage=True
44
  )
45
+ vl_gpt = vl_gpt.cuda().eval()
46
  return tokenizer, vl_gpt, vl_chat_processor
47
 
48