vl-7b-4bit

Runtime error

minhdang commited on Mar 21

Commit

1bda56f

•

1 Parent(s): 659b257

Update inference.py

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -35,17 +35,14 @@ from deepseek_vl.utils.conversation import Conversation
 from transformers import BitsAndBytesConfig
-nf8_config = BitsAndBytesConfig(
-    load_in_8bit=True,
-    bnb_8bit_quant_type="nf8",
-)
 def load_model(model_path):
     vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
     tokenizer = vl_chat_processor.tokenizer
     vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
-        model_path, trust_remote_code=True, quantization_config=nf8_config,low_cpu_mem_usage=True
     )
-    vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
     return tokenizer, vl_gpt, vl_chat_processor

 from transformers import BitsAndBytesConfig
 def load_model(model_path):
     vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
     tokenizer = vl_chat_processor.tokenizer
     vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
+        model_path, trust_remote_code=True, load_in_8bit=True,low_cpu_mem_usage=True
     )
+    vl_gpt = vl_gpt.cuda().eval()
     return tokenizer, vl_gpt, vl_chat_processor