minhdang commited on
Commit
f226eec
1 Parent(s): efa875e

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +4 -2
inference.py CHANGED
@@ -36,13 +36,15 @@ from deepseek_vl.utils.conversation import Conversation
36
  from transformers import BitsAndBytesConfig
37
  from transformers import QuantoConfig
38
 
39
- quanto_config = QuantoConfig(weights="int4")
40
 
41
  def load_model(model_path):
42
  vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
43
  tokenizer = vl_chat_processor.tokenizer
44
  vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
45
- model_path, trust_remote_code=True, quantization_config = quanto_config,low_cpu_mem_usage=True
 
 
46
  )
47
 
48
  vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
 
36
  from transformers import BitsAndBytesConfig
37
  from transformers import QuantoConfig
38
 
39
+ # quanto_config = QuantoConfig(weights="int4")
40
 
41
  def load_model(model_path):
42
  vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
43
  tokenizer = vl_chat_processor.tokenizer
44
  vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
45
+ model_path, trust_remote_code=True,
46
+ # quantization_config = quanto_config,
47
+ low_cpu_mem_usage=True
48
  )
49
 
50
  vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()