YenChunChen
commited on
Commit
•
3978796
1
Parent(s):
71625d6
config defaults to FA2, code snippet in README shows explicit argument in `from_pretrained`
Browse files- README.md +1 -1
- config.json +2 -1
README.md
CHANGED
@@ -105,7 +105,7 @@ from transformers import AutoProcessor
|
|
105 |
|
106 |
model_id = "microsoft/Phi-3-vision-128k-instruct"
|
107 |
|
108 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto",
|
109 |
|
110 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
111 |
|
|
|
105 |
|
106 |
model_id = "microsoft/Phi-3-vision-128k-instruct"
|
107 |
|
108 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", _attn_implementation='flash_attention_2') # use _attn_implementation='eager' to disable flash attention
|
109 |
|
110 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
111 |
|
config.json
CHANGED
@@ -143,5 +143,6 @@
|
|
143 |
"torch_dtype": "bfloat16",
|
144 |
"transformers_version": "4.38.1",
|
145 |
"use_cache": true,
|
146 |
-
"vocab_size": 32064
|
|
|
147 |
}
|
|
|
143 |
"torch_dtype": "bfloat16",
|
144 |
"transformers_version": "4.38.1",
|
145 |
"use_cache": true,
|
146 |
+
"vocab_size": 32064,
|
147 |
+
"_attn_implementation": "flash_attention_2"
|
148 |
}
|