microsoft
/

Phi-3-vision-128k-instruct

@@ -105,7 +105,7 @@ from transformers import AutoProcessor
 model_id = "microsoft/Phi-3-vision-128k-instruct"
-model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", attn_implementation='flash_attention_2') # use attn_implementation='eager' to disable flash attention
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

 model_id = "microsoft/Phi-3-vision-128k-instruct"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", _attn_implementation='flash_attention_2') # use _attn_implementation='eager' to disable flash attention
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

config.json CHANGED Viewed

@@ -143,5 +143,6 @@
   "torch_dtype": "bfloat16",
   "transformers_version": "4.38.1",
   "use_cache": true,
-  "vocab_size": 32064
 }

   "torch_dtype": "bfloat16",
   "transformers_version": "4.38.1",
   "use_cache": true,
+  "vocab_size": 32064,
+  "_attn_implementation": "flash_attention_2"
 }