YenChunChen commited on
Commit
3978796
1 Parent(s): 71625d6

config defaults to FA2, code snippet in README shows explicit argument in `from_pretrained`

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. config.json +2 -1
README.md CHANGED
@@ -105,7 +105,7 @@ from transformers import AutoProcessor
105
 
106
  model_id = "microsoft/Phi-3-vision-128k-instruct"
107
 
108
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", attn_implementation='flash_attention_2') # use attn_implementation='eager' to disable flash attention
109
 
110
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
111
 
 
105
 
106
  model_id = "microsoft/Phi-3-vision-128k-instruct"
107
 
108
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto", _attn_implementation='flash_attention_2') # use _attn_implementation='eager' to disable flash attention
109
 
110
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
111
 
config.json CHANGED
@@ -143,5 +143,6 @@
143
  "torch_dtype": "bfloat16",
144
  "transformers_version": "4.38.1",
145
  "use_cache": true,
146
- "vocab_size": 32064
 
147
  }
 
143
  "torch_dtype": "bfloat16",
144
  "transformers_version": "4.38.1",
145
  "use_cache": true,
146
+ "vocab_size": 32064,
147
+ "_attn_implementation": "flash_attention_2"
148
  }